/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 847 - (hide annotations) (download)
Tue Jan 3 17:49:03 2012 UTC (2 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 78831 byte(s)
fix signed/unsigned half load mismatches and JIT compiler update
1 ph10 662 /*
2     * Stack-less Just-In-Time compiler
3     *
4 ph10 836 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 ph10 662 *
6     * Redistribution and use in source and binary forms, with or without modification, are
7     * permitted provided that the following conditions are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright notice, this list of
10     * conditions and the following disclaimer.
11     *
12     * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13     * of conditions and the following disclaimer in the documentation and/or other materials
14     * provided with the distribution.
15     *
16     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17     * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19     * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21     * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22     * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24     * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25     */
26    
27 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 ph10 662 {
29     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
30     return "x86-32";
31     #else
32     return "x86-64";
33     #endif
34     }
35    
36     /*
37     32b register indexes:
38     0 - EAX
39     1 - ECX
40     2 - EDX
41     3 - EBX
42     4 - none
43     5 - EBP
44     6 - ESI
45     7 - EDI
46     */
47    
48     /*
49     64b register indexes:
50     0 - RAX
51     1 - RCX
52     2 - RDX
53     3 - RBX
54     4 - none
55     5 - RBP
56     6 - RSI
57     7 - RDI
58     8 - R8 - From now on REX prefix is required
59     9 - R9
60     10 - R10
61     11 - R11
62     12 - R12
63     13 - R13
64     14 - R14
65     15 - R15
66     */
67    
68     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69    
70     /* Last register + 1. */
71     #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
72    
73     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
74     0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
75     };
76    
77     #define CHECK_EXTRA_REGS(p, w, do) \
78     if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
79     w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
80     p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
81     do; \
82     } \
83     else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \
84     w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \
85     p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
86     do; \
87     }
88    
89     #else /* SLJIT_CONFIG_X86_32 */
90    
91     /* Last register + 1. */
92     #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
93     #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
94     #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
95    
96     /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
97     Note: avoid to use r12 and r13 for memory addessing
98     therefore r12 is better for GENERAL_EREG than GENERAL_REG. */
99     #ifndef _WIN64
100     /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
101     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
102     0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
103     };
104     /* low-map. reg_map & 0x7. */
105     static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
106     0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
107     };
108     #else
109     /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
110     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
111     0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
112     };
113     /* low-map. reg_map & 0x7. */
114     static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
115     0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
116     };
117     #endif
118    
119     #define REX_W 0x48
120     #define REX_R 0x44
121     #define REX_X 0x42
122     #define REX_B 0x41
123     #define REX 0x40
124    
125     typedef unsigned int sljit_uhw;
126     typedef int sljit_hw;
127    
128     #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
129     #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
130    
131     #define CHECK_EXTRA_REGS(p, w, do)
132    
133     #endif /* SLJIT_CONFIG_X86_32 */
134    
135     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
136     #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
137     #endif
138    
139     /* Size flags for emit_x86_instruction: */
140     #define EX86_BIN_INS 0x0010
141     #define EX86_SHIFT_INS 0x0020
142     #define EX86_REX 0x0040
143     #define EX86_NO_REXW 0x0080
144     #define EX86_BYTE_ARG 0x0100
145     #define EX86_HALF_ARG 0x0200
146     #define EX86_PREF_66 0x0400
147    
148     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
149     #define EX86_PREF_F2 0x0800
150     #define EX86_SSE2 0x1000
151     #endif
152    
153     #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
154     #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
155    
156     #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
157     #define POP_REG(r) (*buf++ = (0x58 + (r)))
158     #define RET() (*buf++ = (0xc3))
159     #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
160     /* r32, r/m32 */
161     #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
162    
163     static sljit_ub get_jump_code(int type)
164     {
165     switch (type) {
166     case SLJIT_C_EQUAL:
167     case SLJIT_C_FLOAT_EQUAL:
168     return 0x84;
169    
170     case SLJIT_C_NOT_EQUAL:
171     case SLJIT_C_FLOAT_NOT_EQUAL:
172     return 0x85;
173    
174     case SLJIT_C_LESS:
175     case SLJIT_C_FLOAT_LESS:
176     return 0x82;
177    
178     case SLJIT_C_GREATER_EQUAL:
179     case SLJIT_C_FLOAT_GREATER_EQUAL:
180     return 0x83;
181    
182     case SLJIT_C_GREATER:
183     case SLJIT_C_FLOAT_GREATER:
184     return 0x87;
185    
186     case SLJIT_C_LESS_EQUAL:
187     case SLJIT_C_FLOAT_LESS_EQUAL:
188     return 0x86;
189    
190     case SLJIT_C_SIG_LESS:
191     return 0x8c;
192    
193     case SLJIT_C_SIG_GREATER_EQUAL:
194     return 0x8d;
195    
196     case SLJIT_C_SIG_GREATER:
197     return 0x8f;
198    
199     case SLJIT_C_SIG_LESS_EQUAL:
200     return 0x8e;
201    
202     case SLJIT_C_OVERFLOW:
203     case SLJIT_C_MUL_OVERFLOW:
204     return 0x80;
205    
206     case SLJIT_C_NOT_OVERFLOW:
207     case SLJIT_C_MUL_NOT_OVERFLOW:
208     return 0x81;
209    
210     case SLJIT_C_FLOAT_NAN:
211     return 0x8a;
212    
213     case SLJIT_C_FLOAT_NOT_NAN:
214     return 0x8b;
215     }
216     return 0;
217     }
218    
219     static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
220    
221     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
222     static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
223     #endif
224    
225     static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
226     {
227     int short_jump;
228     sljit_uw label_addr;
229    
230     if (jump->flags & JUMP_LABEL)
231     label_addr = (sljit_uw)(code + jump->u.label->size);
232     else
233     label_addr = jump->u.target;
234     short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
235    
236     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
237     if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
238     return generate_far_jump_code(jump, code_ptr, type);
239     #endif
240    
241     if (type == SLJIT_JUMP) {
242     if (short_jump)
243     *code_ptr++ = 0xeb;
244     else
245     *code_ptr++ = 0xe9;
246     jump->addr++;
247     }
248 zherczeg 722 else if (type >= SLJIT_FAST_CALL) {
249 ph10 662 short_jump = 0;
250     *code_ptr++ = 0xe8;
251     jump->addr++;
252     }
253     else if (short_jump) {
254     *code_ptr++ = get_jump_code(type) - 0x10;
255     jump->addr++;
256     }
257     else {
258     *code_ptr++ = 0x0f;
259     *code_ptr++ = get_jump_code(type);
260     jump->addr += 2;
261     }
262    
263     if (short_jump) {
264     jump->flags |= PATCH_MB;
265     code_ptr += sizeof(sljit_b);
266     } else {
267     jump->flags |= PATCH_MW;
268     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
269     code_ptr += sizeof(sljit_w);
270     #else
271     code_ptr += sizeof(sljit_hw);
272     #endif
273     }
274    
275     return code_ptr;
276     }
277    
278 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
279 ph10 662 {
280     struct sljit_memory_fragment *buf;
281     sljit_ub *code;
282     sljit_ub *code_ptr;
283     sljit_ub *buf_ptr;
284     sljit_ub *buf_end;
285     sljit_ub len;
286    
287     struct sljit_label *label;
288     struct sljit_jump *jump;
289     struct sljit_const *const_;
290    
291     CHECK_ERROR_PTR();
292     check_sljit_generate_code(compiler);
293     reverse_buf(compiler);
294    
295     /* Second code generation pass. */
296     code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
297     PTR_FAIL_WITH_EXEC_IF(code);
298     buf = compiler->buf;
299    
300     code_ptr = code;
301     label = compiler->labels;
302     jump = compiler->jumps;
303     const_ = compiler->consts;
304     do {
305     buf_ptr = buf->memory;
306     buf_end = buf_ptr + buf->used_size;
307     do {
308     len = *buf_ptr++;
309     if (len > 0) {
310     /* The code is already generated. */
311     SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
312     code_ptr += len;
313     buf_ptr += len;
314     }
315     else {
316     if (*buf_ptr >= 4) {
317     jump->addr = (sljit_uw)code_ptr;
318     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
319     code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
320     else
321     code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
322     jump = jump->next;
323     }
324     else if (*buf_ptr == 0) {
325     label->addr = (sljit_uw)code_ptr;
326     label->size = code_ptr - code;
327     label = label->next;
328     }
329     else if (*buf_ptr == 1) {
330     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
331     const_ = const_->next;
332     }
333     else {
334     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
335     *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
336     buf_ptr++;
337     *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
338     code_ptr += sizeof(sljit_w);
339     buf_ptr += sizeof(sljit_w) - 1;
340     #else
341     code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
342     buf_ptr += sizeof(sljit_w);
343     #endif
344     }
345     buf_ptr++;
346     }
347     } while (buf_ptr < buf_end);
348     SLJIT_ASSERT(buf_ptr == buf_end);
349     buf = buf->next;
350     } while (buf);
351    
352     SLJIT_ASSERT(!label);
353     SLJIT_ASSERT(!jump);
354     SLJIT_ASSERT(!const_);
355    
356     jump = compiler->jumps;
357     while (jump) {
358     if (jump->flags & PATCH_MB) {
359     SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
360 ph10 836 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
361 ph10 662 } else if (jump->flags & PATCH_MW) {
362     if (jump->flags & JUMP_LABEL) {
363     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
364 ph10 836 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
365 ph10 662 #else
366     SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
367 ph10 836 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
368 ph10 662 #endif
369     }
370     else {
371     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
372 ph10 836 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
373 ph10 662 #else
374     SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
375 ph10 836 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
376 ph10 662 #endif
377     }
378     }
379     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
380     else if (jump->flags & PATCH_MD)
381     *(sljit_w*)jump->addr = jump->u.label->addr;
382     #endif
383    
384     jump = jump->next;
385     }
386    
387     /* Maybe we waste some space because of short jumps. */
388     SLJIT_ASSERT(code_ptr <= code + compiler->size);
389     compiler->error = SLJIT_ERR_COMPILED;
390 ph10 836 compiler->executable_size = compiler->size;
391 ph10 662 return (void*)code;
392     }
393    
394     /* --------------------------------------------------------------------- */
395     /* Operators */
396     /* --------------------------------------------------------------------- */
397    
398     static int emit_cum_binary(struct sljit_compiler *compiler,
399     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
400     int dst, sljit_w dstw,
401     int src1, sljit_w src1w,
402     int src2, sljit_w src2w);
403    
404     static int emit_non_cum_binary(struct sljit_compiler *compiler,
405     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
406     int dst, sljit_w dstw,
407     int src1, sljit_w src1w,
408     int src2, sljit_w src2w);
409    
410     static int emit_mov(struct sljit_compiler *compiler,
411     int dst, sljit_w dstw,
412     int src, sljit_w srcw);
413    
414     static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
415     {
416     sljit_ub *buf;
417    
418     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
419     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
420     FAIL_IF(!buf);
421     INC_SIZE(5);
422     *buf++ = 0x9c; /* pushfd */
423     #else
424     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
425     FAIL_IF(!buf);
426     INC_SIZE(6);
427     *buf++ = 0x9c; /* pushfq */
428     *buf++ = 0x48;
429     #endif
430     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
431     *buf++ = 0x64;
432     *buf++ = 0x24;
433     *buf++ = sizeof(sljit_w);
434     compiler->flags_saved = 1;
435     return SLJIT_SUCCESS;
436     }
437    
438     static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
439     {
440     sljit_ub *buf;
441    
442     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
444     FAIL_IF(!buf);
445     INC_SIZE(5);
446     #else
447     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
448     FAIL_IF(!buf);
449     INC_SIZE(6);
450     *buf++ = 0x48;
451     #endif
452     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
453     *buf++ = 0x64;
454     *buf++ = 0x24;
455     *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
456     *buf++ = 0x9d; /* popfd / popfq */
457     compiler->flags_saved = keep_flags;
458     return SLJIT_SUCCESS;
459     }
460    
461     #ifdef _WIN32
462     #include <malloc.h>
463    
464     static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
465     {
466     /* Workaround for calling _chkstk. */
467     alloca(local_size);
468     }
469     #endif
470    
471     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472     #include "sljitNativeX86_32.c"
473     #else
474     #include "sljitNativeX86_64.c"
475     #endif
476    
477     static int emit_mov(struct sljit_compiler *compiler,
478     int dst, sljit_w dstw,
479     int src, sljit_w srcw)
480     {
481     sljit_ub* code;
482    
483     if (dst == SLJIT_UNUSED) {
484     /* No destination, doesn't need to setup flags. */
485     if (src & SLJIT_MEM) {
486     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
487     FAIL_IF(!code);
488     *code = 0x8b;
489     }
490     return SLJIT_SUCCESS;
491     }
492     if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
493     code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
494     FAIL_IF(!code);
495     *code = 0x89;
496     return SLJIT_SUCCESS;
497     }
498     if (src & SLJIT_IMM) {
499     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
500     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
501     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
502     #else
503     if (!compiler->mode32) {
504     if (NOT_HALFWORD(srcw))
505     return emit_load_imm64(compiler, dst, srcw);
506     }
507     else
508     return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
509     #endif
510     }
511     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
512     if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
513     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
514     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
515     FAIL_IF(!code);
516     *code = 0x89;
517     return SLJIT_SUCCESS;
518     }
519     #endif
520     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
521     FAIL_IF(!code);
522     *code = 0xc7;
523     return SLJIT_SUCCESS;
524     }
525     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
526     code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
527     FAIL_IF(!code);
528     *code = 0x8b;
529     return SLJIT_SUCCESS;
530     }
531    
532     /* Memory to memory move. Requires two instruction. */
533     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
534     FAIL_IF(!code);
535     *code = 0x8b;
536     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
537     FAIL_IF(!code);
538     *code = 0x89;
539     return SLJIT_SUCCESS;
540     }
541    
542     #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
543     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
544    
545 zherczeg 847 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
546     {
547     sljit_ub *buf;
548     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
549     int size;
550     #endif
551    
552     CHECK_ERROR();
553     check_sljit_emit_op0(compiler, op);
554    
555     switch (GET_OPCODE(op)) {
556     case SLJIT_BREAKPOINT:
557     buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
558     FAIL_IF(!buf);
559     INC_SIZE(1);
560     *buf = 0xcc;
561     break;
562     case SLJIT_NOP:
563     buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
564     FAIL_IF(!buf);
565     INC_SIZE(1);
566     *buf = 0x90;
567     break;
568     case SLJIT_UMUL:
569     case SLJIT_SMUL:
570     case SLJIT_UDIV:
571     case SLJIT_SDIV:
572     compiler->flags_saved = 0;
573     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
574     #ifdef _WIN64
575     SLJIT_COMPILE_ASSERT(
576     reg_map[SLJIT_TEMPORARY_REG1] == 0
577     && reg_map[SLJIT_TEMPORARY_REG2] == 2
578     && reg_map[TMP_REGISTER] > 7,
579     invalid_register_assignment_for_div_mul);
580     #else
581     SLJIT_COMPILE_ASSERT(
582     reg_map[SLJIT_TEMPORARY_REG1] == 0
583     && reg_map[SLJIT_TEMPORARY_REG2] < 7
584     && reg_map[TMP_REGISTER] == 2,
585     invalid_register_assignment_for_div_mul);
586     #endif
587     compiler->mode32 = op & SLJIT_INT_OP;
588     #endif
589    
590     op = GET_OPCODE(op);
591     if (op == SLJIT_UDIV) {
592     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
593     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
594     buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
595     #else
596     buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
597     #endif
598     FAIL_IF(!buf);
599     *buf = 0x33;
600     }
601    
602     if (op == SLJIT_SDIV) {
603     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
604     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
605     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0);
606     #else
607     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
608     #endif
609    
610     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
611     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
612     FAIL_IF(!buf);
613     INC_SIZE(3);
614     *buf++ = 0xc1;
615     *buf++ = 0xfa;
616     *buf = 0x1f;
617     #else
618     if (compiler->mode32) {
619     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
620     FAIL_IF(!buf);
621     INC_SIZE(3);
622     *buf++ = 0xc1;
623     *buf++ = 0xfa;
624     *buf = 0x1f;
625     } else {
626     buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
627     FAIL_IF(!buf);
628     INC_SIZE(4);
629     *buf++ = REX_W;
630     *buf++ = 0xc1;
631     *buf++ = 0xfa;
632     *buf = 0x3f;
633     }
634     #endif
635     }
636    
637     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
638     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
639     FAIL_IF(!buf);
640     INC_SIZE(2);
641     *buf++ = 0xf7;
642     *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
643     #else
644     #ifdef _WIN64
645     size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
646     #else
647     size = (!compiler->mode32) ? 3 : 2;
648     #endif
649     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
650     FAIL_IF(!buf);
651     INC_SIZE(size);
652     #ifdef _WIN64
653     if (!compiler->mode32)
654     *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
655     else if (op >= SLJIT_UDIV)
656     *buf++ = REX_B;
657     *buf++ = 0xf7;
658     *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
659     #else
660     if (!compiler->mode32)
661     *buf++ = REX_W;
662     *buf++ = 0xf7;
663     *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
664     #endif
665     #endif
666     switch (op) {
667     case SLJIT_UMUL:
668     *buf |= 4 << 3;
669     break;
670     case SLJIT_SMUL:
671     *buf |= 5 << 3;
672     break;
673     case SLJIT_UDIV:
674     *buf |= 6 << 3;
675     break;
676     case SLJIT_SDIV:
677     *buf |= 7 << 3;
678     break;
679     }
680     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
681     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
682     #endif
683     break;
684     }
685    
686     return SLJIT_SUCCESS;
687     }
688    
689 ph10 662 #define ENCODE_PREFIX(prefix) \
690     do { \
691     code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
692     FAIL_IF(!code); \
693     INC_CSIZE(1); \
694     *code = (prefix); \
695     } while (0)
696    
697     static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
698     int dst, sljit_w dstw,
699     int src, sljit_w srcw)
700     {
701     sljit_ub* code;
702     int dst_r;
703     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
704     int work_r;
705     #endif
706    
707     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
708     compiler->mode32 = 0;
709     #endif
710    
711     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
712     return SLJIT_SUCCESS; /* Empty instruction. */
713    
714     if (src & SLJIT_IMM) {
715     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
716     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
717     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
718     #else
719     return emit_load_imm64(compiler, dst, srcw);
720     #endif
721     }
722     code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
723     FAIL_IF(!code);
724     *code = 0xc6;
725     return SLJIT_SUCCESS;
726     }
727    
728     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
729    
730     if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
731     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
732     if (reg_map[src] >= 4) {
733     SLJIT_ASSERT(dst_r == TMP_REGISTER);
734     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
735     } else
736     dst_r = src;
737     #else
738     dst_r = src;
739     #endif
740     }
741     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
742     else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
743     /* src, dst are registers. */
744     SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
745     if (reg_map[dst] < 4) {
746     if (dst != src)
747     EMIT_MOV(compiler, dst, 0, src, 0);
748     code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
749     FAIL_IF(!code);
750     *code++ = 0x0f;
751     *code = sign ? 0xbe : 0xb6;
752     }
753     else {
754     if (dst != src)
755     EMIT_MOV(compiler, dst, 0, src, 0);
756     if (sign) {
757     /* shl reg, 24 */
758     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
759     FAIL_IF(!code);
760     *code |= 0x4 << 3;
761     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
762     FAIL_IF(!code);
763     /* shr/sar reg, 24 */
764     *code |= 0x7 << 3;
765     }
766     else {
767     /* and dst, 0xff */
768     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
769     FAIL_IF(!code);
770     *(code + 1) |= 0x4 << 3;
771     }
772     }
773     return SLJIT_SUCCESS;
774     }
775     #endif
776     else {
777     /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
778     code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
779     FAIL_IF(!code);
780     *code++ = 0x0f;
781     *code = sign ? 0xbe : 0xb6;
782     }
783    
784     if (dst & SLJIT_MEM) {
785     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786     if (dst_r == TMP_REGISTER) {
787     /* Find a non-used register, whose reg_map[src] < 4. */
788     if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
789     if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
790     work_r = SLJIT_TEMPORARY_REG3;
791     else
792     work_r = SLJIT_TEMPORARY_REG2;
793     }
794     else {
795     if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
796     work_r = SLJIT_TEMPORARY_REG1;
797     else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
798     work_r = SLJIT_TEMPORARY_REG3;
799     else
800     work_r = SLJIT_TEMPORARY_REG2;
801     }
802    
803     if (work_r == SLJIT_TEMPORARY_REG1) {
804     ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
805     }
806     else {
807     code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
808     FAIL_IF(!code);
809     *code = 0x87;
810     }
811    
812     code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
813     FAIL_IF(!code);
814     *code = 0x88;
815    
816     if (work_r == SLJIT_TEMPORARY_REG1) {
817     ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
818     }
819     else {
820     code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
821     FAIL_IF(!code);
822     *code = 0x87;
823     }
824     }
825     else {
826     code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
827     FAIL_IF(!code);
828     *code = 0x88;
829     }
830     #else
831     code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
832     FAIL_IF(!code);
833     *code = 0x88;
834     #endif
835     }
836    
837     return SLJIT_SUCCESS;
838     }
839    
840     static int emit_mov_half(struct sljit_compiler *compiler, int sign,
841     int dst, sljit_w dstw,
842     int src, sljit_w srcw)
843     {
844     sljit_ub* code;
845     int dst_r;
846    
847     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
848     compiler->mode32 = 0;
849     #endif
850    
851     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
852     return SLJIT_SUCCESS; /* Empty instruction. */
853    
854     if (src & SLJIT_IMM) {
855     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
856     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
857     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
858     #else
859     return emit_load_imm64(compiler, dst, srcw);
860     #endif
861     }
862     code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
863     FAIL_IF(!code);
864     *code = 0xc7;
865     return SLJIT_SUCCESS;
866     }
867    
868     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
869    
870     if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
871     dst_r = src;
872     else {
873     code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
874     FAIL_IF(!code);
875     *code++ = 0x0f;
876     *code = sign ? 0xbf : 0xb7;
877     }
878    
879     if (dst & SLJIT_MEM) {
880     code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
881     FAIL_IF(!code);
882     *code = 0x89;
883     }
884    
885     return SLJIT_SUCCESS;
886     }
887    
888     static int emit_unary(struct sljit_compiler *compiler, int un_index,
889     int dst, sljit_w dstw,
890     int src, sljit_w srcw)
891     {
892     sljit_ub* code;
893    
894     if (dst == SLJIT_UNUSED) {
895     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
896     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
897     FAIL_IF(!code);
898     *code++ = 0xf7;
899     *code |= (un_index) << 3;
900     return SLJIT_SUCCESS;
901     }
902     if (dst == src && dstw == srcw) {
903     /* Same input and output */
904     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
905     FAIL_IF(!code);
906     *code++ = 0xf7;
907     *code |= (un_index) << 3;
908     return SLJIT_SUCCESS;
909     }
910     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
911     EMIT_MOV(compiler, dst, 0, src, srcw);
912     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
913     FAIL_IF(!code);
914     *code++ = 0xf7;
915     *code |= (un_index) << 3;
916     return SLJIT_SUCCESS;
917     }
918     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
919     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
920     FAIL_IF(!code);
921     *code++ = 0xf7;
922     *code |= (un_index) << 3;
923     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
924     return SLJIT_SUCCESS;
925     }
926    
927     static int emit_not_with_flags(struct sljit_compiler *compiler,
928     int dst, sljit_w dstw,
929     int src, sljit_w srcw)
930     {
931     sljit_ub* code;
932    
933     if (dst == SLJIT_UNUSED) {
934     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
935     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
936     FAIL_IF(!code);
937     *code++ = 0xf7;
938     *code |= 0x2 << 3;
939     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
940     FAIL_IF(!code);
941     *code = 0x0b;
942     return SLJIT_SUCCESS;
943     }
944     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
945     EMIT_MOV(compiler, dst, 0, src, srcw);
946     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
947     FAIL_IF(!code);
948     *code++ = 0xf7;
949     *code |= 0x2 << 3;
950     code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
951     FAIL_IF(!code);
952     *code = 0x0b;
953     return SLJIT_SUCCESS;
954     }
955     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
956     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
957     FAIL_IF(!code);
958     *code++ = 0xf7;
959     *code |= 0x2 << 3;
960     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
961     FAIL_IF(!code);
962     *code = 0x0b;
963     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
964     return SLJIT_SUCCESS;
965     }
966    
967     static int emit_clz(struct sljit_compiler *compiler, int op,
968     int dst, sljit_w dstw,
969     int src, sljit_w srcw)
970     {
971     sljit_ub* code;
972     int dst_r;
973    
974     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
975     /* Just set the zero flag. */
976     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
977     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
978     FAIL_IF(!code);
979     *code++ = 0xf7;
980     *code |= 0x2 << 3;
981     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
982     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
983     #else
984     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
985     #endif
986     FAIL_IF(!code);
987     *code |= 0x5 << 3;
988     return SLJIT_SUCCESS;
989     }
990    
991     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
992     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
993     src = TMP_REGISTER;
994     srcw = 0;
995     }
996    
997     code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
998     FAIL_IF(!code);
999     *code++ = 0x0f;
1000     *code = 0xbd;
1001    
1002     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1003     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
1004     dst_r = dst;
1005     else {
1006     /* Find an unused temporary register. */
1007     if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1008     dst_r = SLJIT_TEMPORARY_REG1;
1009     else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1010     dst_r = SLJIT_TEMPORARY_REG2;
1011     else
1012     dst_r = SLJIT_TEMPORARY_REG3;
1013     EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1014     }
1015     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1016     #else
1017     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1018     compiler->mode32 = 0;
1019     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1020     compiler->mode32 = op & SLJIT_INT_OP;
1021     #endif
1022    
1023     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1024     FAIL_IF(!code);
1025     *code++ = 0x0f;
1026     *code = 0x45;
1027    
1028     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1029     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1030     #else
1031     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1032     #endif
1033     FAIL_IF(!code);
1034     *(code + 1) |= 0x6 << 3;
1035    
1036     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1037     if (dst & SLJIT_MEM) {
1038     code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1039     FAIL_IF(!code);
1040     *code = 0x87;
1041     }
1042     #else
1043     if (dst & SLJIT_MEM)
1044     EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1045     #endif
1046     return SLJIT_SUCCESS;
1047     }
1048    
1049 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1050 ph10 662 int dst, sljit_w dstw,
1051     int src, sljit_w srcw)
1052     {
1053     sljit_ub* code;
1054     int update = 0;
1055     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1056     int dst_is_ereg = 0;
1057     int src_is_ereg = 0;
1058     #else
1059     #define src_is_ereg 0
1060     #endif
1061    
1062     CHECK_ERROR();
1063     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1064    
1065     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066     compiler->mode32 = op & SLJIT_INT_OP;
1067     #endif
1068     CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1069     CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1070    
1071     if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1072     op = GET_OPCODE(op);
1073     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1074     compiler->mode32 = 0;
1075     #endif
1076    
1077 zherczeg 740 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1078 ph10 662 if (op >= SLJIT_MOVU) {
1079     update = 1;
1080     op -= 7;
1081     }
1082    
1083     if (src & SLJIT_IMM) {
1084     switch (op) {
1085     case SLJIT_MOV_UB:
1086     srcw = (unsigned char)srcw;
1087     break;
1088     case SLJIT_MOV_SB:
1089     srcw = (signed char)srcw;
1090     break;
1091     case SLJIT_MOV_UH:
1092     srcw = (unsigned short)srcw;
1093     break;
1094     case SLJIT_MOV_SH:
1095     srcw = (signed short)srcw;
1096     break;
1097     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1098     case SLJIT_MOV_UI:
1099     srcw = (unsigned int)srcw;
1100     break;
1101     case SLJIT_MOV_SI:
1102     srcw = (signed int)srcw;
1103     break;
1104     #endif
1105     }
1106     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1107     if (SLJIT_UNLIKELY(dst_is_ereg))
1108     return emit_mov(compiler, dst, dstw, src, srcw);
1109     #endif
1110     }
1111    
1112     if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1113     code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1114     FAIL_IF(!code);
1115     *code = 0x8d;
1116     src &= SLJIT_MEM | 0xf;
1117     srcw = 0;
1118     }
1119    
1120     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1121     if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1122     SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1123     dst = TMP_REGISTER;
1124     }
1125     #endif
1126    
1127     switch (op) {
1128     case SLJIT_MOV:
1129     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1130     case SLJIT_MOV_UI:
1131     case SLJIT_MOV_SI:
1132     #endif
1133     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1134     break;
1135     case SLJIT_MOV_UB:
1136     FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1137     break;
1138     case SLJIT_MOV_SB:
1139     FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1140     break;
1141     case SLJIT_MOV_UH:
1142     FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1143     break;
1144     case SLJIT_MOV_SH:
1145     FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1146     break;
1147     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1148     case SLJIT_MOV_UI:
1149     FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1150     break;
1151     case SLJIT_MOV_SI:
1152     FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1153     break;
1154     #endif
1155     }
1156    
1157     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1158     if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1159     return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1160     #endif
1161    
1162     if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1163     code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1164     FAIL_IF(!code);
1165     *code = 0x8d;
1166     }
1167     return SLJIT_SUCCESS;
1168     }
1169    
1170     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1171     compiler->flags_saved = 0;
1172    
1173     switch (GET_OPCODE(op)) {
1174     case SLJIT_NOT:
1175     if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1176     return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1177     return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1178    
1179     case SLJIT_NEG:
1180     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1181     FAIL_IF(emit_save_flags(compiler));
1182     return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1183    
1184     case SLJIT_CLZ:
1185     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1186     FAIL_IF(emit_save_flags(compiler));
1187     return emit_clz(compiler, op, dst, dstw, src, srcw);
1188     }
1189    
1190     return SLJIT_SUCCESS;
1191    
1192     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1193     #undef src_is_ereg
1194     #endif
1195     }
1196    
1197     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1198    
1199     #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1200     if (IS_HALFWORD(immw) || compiler->mode32) { \
1201     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1202     FAIL_IF(!code); \
1203     *(code + 1) |= (_op_imm_); \
1204     } \
1205     else { \
1206     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1207     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1208     FAIL_IF(!code); \
1209     *code = (_op_mr_); \
1210     }
1211    
1212     #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1213     FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1214    
1215     #else
1216    
1217     #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1218     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1219     FAIL_IF(!code); \
1220     *(code + 1) |= (_op_imm_);
1221    
1222     #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1223     FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1224    
1225     #endif
1226    
1227     static int emit_cum_binary(struct sljit_compiler *compiler,
1228     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1229     int dst, sljit_w dstw,
1230     int src1, sljit_w src1w,
1231     int src2, sljit_w src2w)
1232     {
1233     sljit_ub* code;
1234    
1235     if (dst == SLJIT_UNUSED) {
1236     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1237     if (src2 & SLJIT_IMM) {
1238     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1239     }
1240     else {
1241     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1242     FAIL_IF(!code);
1243     *code = op_rm;
1244     }
1245     return SLJIT_SUCCESS;
1246     }
1247    
1248     if (dst == src1 && dstw == src1w) {
1249     if (src2 & SLJIT_IMM) {
1250     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1251     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1252     #else
1253     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1254     #endif
1255     BINARY_EAX_IMM(op_eax_imm, src2w);
1256     }
1257     else {
1258     BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1259     }
1260     }
1261     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1262     code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1263     FAIL_IF(!code);
1264     *code = op_rm;
1265     }
1266     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1267     /* Special exception for sljit_emit_cond_value. */
1268     code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1269     FAIL_IF(!code);
1270     *code = op_mr;
1271     }
1272     else {
1273     EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1274     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1275     FAIL_IF(!code);
1276     *code = op_mr;
1277     }
1278     return SLJIT_SUCCESS;
1279     }
1280    
1281     /* Only for cumulative operations. */
1282     if (dst == src2 && dstw == src2w) {
1283     if (src1 & SLJIT_IMM) {
1284     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1285     if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1286     #else
1287     if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1288     #endif
1289     BINARY_EAX_IMM(op_eax_imm, src1w);
1290     }
1291     else {
1292     BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1293     }
1294     }
1295     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1296     code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1297     FAIL_IF(!code);
1298     *code = op_rm;
1299     }
1300     else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1301     code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1302     FAIL_IF(!code);
1303     *code = op_mr;
1304     }
1305     else {
1306     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1307     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1308     FAIL_IF(!code);
1309     *code = op_mr;
1310     }
1311     return SLJIT_SUCCESS;
1312     }
1313    
1314     /* General version. */
1315     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1316     EMIT_MOV(compiler, dst, 0, src1, src1w);
1317     if (src2 & SLJIT_IMM) {
1318     BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1319     }
1320     else {
1321     code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1322     FAIL_IF(!code);
1323     *code = op_rm;
1324     }
1325     }
1326     else {
1327     /* This version requires less memory writing. */
1328     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1329     if (src2 & SLJIT_IMM) {
1330     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1331     }
1332     else {
1333     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1334     FAIL_IF(!code);
1335     *code = op_rm;
1336     }
1337     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1338     }
1339    
1340     return SLJIT_SUCCESS;
1341     }
1342    
1343     static int emit_non_cum_binary(struct sljit_compiler *compiler,
1344     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1345     int dst, sljit_w dstw,
1346     int src1, sljit_w src1w,
1347     int src2, sljit_w src2w)
1348     {
1349     sljit_ub* code;
1350    
1351     if (dst == SLJIT_UNUSED) {
1352     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1353     if (src2 & SLJIT_IMM) {
1354     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1355     }
1356     else {
1357     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1358     FAIL_IF(!code);
1359     *code = op_rm;
1360     }
1361     return SLJIT_SUCCESS;
1362     }
1363    
1364     if (dst == src1 && dstw == src1w) {
1365     if (src2 & SLJIT_IMM) {
1366     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1367     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1368     #else
1369     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1370     #endif
1371     BINARY_EAX_IMM(op_eax_imm, src2w);
1372     }
1373     else {
1374     BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1375     }
1376     }
1377     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1378     code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1379     FAIL_IF(!code);
1380     *code = op_rm;
1381     }
1382     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1383     code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1384     FAIL_IF(!code);
1385     *code = op_mr;
1386     }
1387     else {
1388     EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1389     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1390     FAIL_IF(!code);
1391     *code = op_mr;
1392     }
1393     return SLJIT_SUCCESS;
1394     }
1395    
1396     /* General version. */
1397     if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1398     EMIT_MOV(compiler, dst, 0, src1, src1w);
1399     if (src2 & SLJIT_IMM) {
1400     BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1401     }
1402     else {
1403     code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1404     FAIL_IF(!code);
1405     *code = op_rm;
1406     }
1407     }
1408     else {
1409     /* This version requires less memory writing. */
1410     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1411     if (src2 & SLJIT_IMM) {
1412     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1413     }
1414     else {
1415     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1416     FAIL_IF(!code);
1417     *code = op_rm;
1418     }
1419     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1420     }
1421    
1422     return SLJIT_SUCCESS;
1423     }
1424    
1425     static int emit_mul(struct sljit_compiler *compiler,
1426     int dst, sljit_w dstw,
1427     int src1, sljit_w src1w,
1428     int src2, sljit_w src2w)
1429     {
1430     sljit_ub* code;
1431     int dst_r;
1432    
1433     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1434    
1435     /* Register destination. */
1436     if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1437     code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1438     FAIL_IF(!code);
1439     *code++ = 0x0f;
1440     *code = 0xaf;
1441     }
1442     else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1443     code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1444     FAIL_IF(!code);
1445     *code++ = 0x0f;
1446     *code = 0xaf;
1447     }
1448     else if (src1 & SLJIT_IMM) {
1449     if (src2 & SLJIT_IMM) {
1450     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1451     src2 = dst_r;
1452     src2w = 0;
1453     }
1454    
1455     if (src1w <= 127 && src1w >= -128) {
1456     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1457     FAIL_IF(!code);
1458     *code = 0x6b;
1459     code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1460     FAIL_IF(!code);
1461     INC_CSIZE(1);
1462     *code = (sljit_b)src1w;
1463     }
1464     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1465     else {
1466     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1467     FAIL_IF(!code);
1468     *code = 0x69;
1469     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1470     FAIL_IF(!code);
1471     INC_CSIZE(4);
1472     *(sljit_w*)code = src1w;
1473     }
1474     #else
1475     else if (IS_HALFWORD(src1w)) {
1476     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1477     FAIL_IF(!code);
1478     *code = 0x69;
1479     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1480     FAIL_IF(!code);
1481     INC_CSIZE(4);
1482 ph10 836 *(sljit_hw*)code = (sljit_hw)src1w;
1483 ph10 662 }
1484     else {
1485     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1486     if (dst_r != src2)
1487     EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1488     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1489     FAIL_IF(!code);
1490     *code++ = 0x0f;
1491     *code = 0xaf;
1492     }
1493     #endif
1494     }
1495     else if (src2 & SLJIT_IMM) {
1496     /* Note: src1 is NOT immediate. */
1497    
1498     if (src2w <= 127 && src2w >= -128) {
1499     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1500     FAIL_IF(!code);
1501     *code = 0x6b;
1502     code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1503     FAIL_IF(!code);
1504     INC_CSIZE(1);
1505     *code = (sljit_b)src2w;
1506     }
1507     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1508     else {
1509     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1510     FAIL_IF(!code);
1511     *code = 0x69;
1512     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1513     FAIL_IF(!code);
1514     INC_CSIZE(4);
1515     *(sljit_w*)code = src2w;
1516     }
1517     #else
1518     else if (IS_HALFWORD(src2w)) {
1519     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1520     FAIL_IF(!code);
1521     *code = 0x69;
1522     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1523     FAIL_IF(!code);
1524     INC_CSIZE(4);
1525 ph10 836 *(sljit_hw*)code = (sljit_hw)src2w;
1526 ph10 662 }
1527     else {
1528     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1529     if (dst_r != src1)
1530     EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1531     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1532     FAIL_IF(!code);
1533     *code++ = 0x0f;
1534     *code = 0xaf;
1535     }
1536     #endif
1537     }
1538     else {
1539     /* Neither argument is immediate. */
1540 zherczeg 740 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1541 ph10 662 dst_r = TMP_REGISTER;
1542     EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1543     code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1544     FAIL_IF(!code);
1545     *code++ = 0x0f;
1546     *code = 0xaf;
1547     }
1548    
1549     if (dst_r == TMP_REGISTER)
1550     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1551    
1552     return SLJIT_SUCCESS;
1553     }
1554    
1555     static int emit_lea_binary(struct sljit_compiler *compiler,
1556     int dst, sljit_w dstw,
1557     int src1, sljit_w src1w,
1558     int src2, sljit_w src2w)
1559     {
1560     sljit_ub* code;
1561     int dst_r, done = 0;
1562    
1563     /* These cases better be left to handled by normal way. */
1564     if (dst == src1 && dstw == src1w)
1565     return SLJIT_ERR_UNSUPPORTED;
1566     if (dst == src2 && dstw == src2w)
1567     return SLJIT_ERR_UNSUPPORTED;
1568    
1569     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1570    
1571     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1572     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1573     /* It is not possible to be both SLJIT_LOCALS_REG. */
1574     if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1575     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1576     FAIL_IF(!code);
1577     *code = 0x8d;
1578     done = 1;
1579     }
1580     }
1581     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1582     if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1583     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1584     #else
1585     if (src2 & SLJIT_IMM) {
1586     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1587     #endif
1588     FAIL_IF(!code);
1589     *code = 0x8d;
1590     done = 1;
1591     }
1592     }
1593     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1594     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1595     if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1596     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1597     #else
1598     if (src1 & SLJIT_IMM) {
1599     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1600     #endif
1601     FAIL_IF(!code);
1602     *code = 0x8d;
1603     done = 1;
1604     }
1605     }
1606    
1607     if (done) {
1608     if (dst_r == TMP_REGISTER)
1609     return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1610     return SLJIT_SUCCESS;
1611     }
1612     return SLJIT_ERR_UNSUPPORTED;
1613     }
1614    
1615     static int emit_cmp_binary(struct sljit_compiler *compiler,
1616     int src1, sljit_w src1w,
1617     int src2, sljit_w src2w)
1618     {
1619     sljit_ub* code;
1620    
1621     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1622     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1623     #else
1624     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1625     #endif
1626     BINARY_EAX_IMM(0x3d, src2w);
1627     return SLJIT_SUCCESS;
1628     }
1629    
1630     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1631     if (src2 & SLJIT_IMM) {
1632     BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1633     }
1634     else {
1635     code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1636     FAIL_IF(!code);
1637     *code = 0x3b;
1638     }
1639     return SLJIT_SUCCESS;
1640     }
1641    
1642     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1643     code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1644     FAIL_IF(!code);
1645     *code = 0x39;
1646     return SLJIT_SUCCESS;
1647     }
1648    
1649     if (src2 & SLJIT_IMM) {
1650     if (src1 & SLJIT_IMM) {
1651     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1652     src1 = TMP_REGISTER;
1653     src1w = 0;
1654     }
1655     BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1656     }
1657     else {
1658     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1659     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1660     FAIL_IF(!code);
1661     *code = 0x3b;
1662     }
1663     return SLJIT_SUCCESS;
1664     }
1665    
1666     static int emit_test_binary(struct sljit_compiler *compiler,
1667     int src1, sljit_w src1w,
1668     int src2, sljit_w src2w)
1669     {
1670     sljit_ub* code;
1671    
1672     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1673     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1674     #else
1675     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1676     #endif
1677     BINARY_EAX_IMM(0xa9, src2w);
1678     return SLJIT_SUCCESS;
1679     }
1680    
1681     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1682     if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1683     #else
1684     if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1685     #endif
1686     BINARY_EAX_IMM(0xa9, src1w);
1687     return SLJIT_SUCCESS;
1688     }
1689    
1690     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1691     if (src2 & SLJIT_IMM) {
1692     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1693     if (IS_HALFWORD(src2w) || compiler->mode32) {
1694     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1695     FAIL_IF(!code);
1696     *code = 0xf7;
1697     }
1698     else {
1699     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1700     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1701     FAIL_IF(!code);
1702     *code = 0x85;
1703     }
1704     #else
1705     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1706     FAIL_IF(!code);
1707     *code = 0xf7;
1708     #endif
1709     }
1710     else {
1711     code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1712     FAIL_IF(!code);
1713     *code = 0x85;
1714     }
1715     return SLJIT_SUCCESS;
1716     }
1717    
1718     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1719     if (src1 & SLJIT_IMM) {
1720     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1721     if (IS_HALFWORD(src1w) || compiler->mode32) {
1722     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1723     FAIL_IF(!code);
1724     *code = 0xf7;
1725     }
1726     else {
1727     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1728     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1729     FAIL_IF(!code);
1730     *code = 0x85;
1731     }
1732     #else
1733     code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1734     FAIL_IF(!code);
1735     *code = 0xf7;
1736     #endif
1737     }
1738     else {
1739     code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1740     FAIL_IF(!code);
1741     *code = 0x85;
1742     }
1743     return SLJIT_SUCCESS;
1744     }
1745    
1746     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1747     if (src2 & SLJIT_IMM) {
1748     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1749     if (IS_HALFWORD(src2w) || compiler->mode32) {
1750     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1751     FAIL_IF(!code);
1752     *code = 0xf7;
1753     }
1754     else {
1755     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1756     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1757     FAIL_IF(!code);
1758     *code = 0x85;
1759     }
1760     #else
1761     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1762     FAIL_IF(!code);
1763     *code = 0xf7;
1764     #endif
1765     }
1766     else {
1767     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1768     FAIL_IF(!code);
1769     *code = 0x85;
1770     }
1771     return SLJIT_SUCCESS;
1772     }
1773    
1774     static int emit_shift(struct sljit_compiler *compiler,
1775     sljit_ub mode,
1776     int dst, sljit_w dstw,
1777     int src1, sljit_w src1w,
1778     int src2, sljit_w src2w)
1779     {
1780     sljit_ub* code;
1781    
1782     if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1783     if (dst == src1 && dstw == src1w) {
1784     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1785     FAIL_IF(!code);
1786     *code |= mode;
1787     return SLJIT_SUCCESS;
1788     }
1789     if (dst == SLJIT_UNUSED) {
1790     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1791     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1792     FAIL_IF(!code);
1793     *code |= mode;
1794     return SLJIT_SUCCESS;
1795     }
1796     if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1797     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1798     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1799     FAIL_IF(!code);
1800     *code |= mode;
1801     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1802     return SLJIT_SUCCESS;
1803     }
1804     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1805     EMIT_MOV(compiler, dst, 0, src1, src1w);
1806     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1807     FAIL_IF(!code);
1808     *code |= mode;
1809     return SLJIT_SUCCESS;
1810     }
1811    
1812     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1813     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1814     FAIL_IF(!code);
1815     *code |= mode;
1816     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1817     return SLJIT_SUCCESS;
1818     }
1819    
1820     if (dst == SLJIT_PREF_SHIFT_REG) {
1821     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1822     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1823     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1824     FAIL_IF(!code);
1825     *code |= mode;
1826     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1827     }
1828 zherczeg 740 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1829 ph10 662 if (src1 != dst)
1830     EMIT_MOV(compiler, dst, 0, src1, src1w);
1831     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1832     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1833     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1834     FAIL_IF(!code);
1835     *code |= mode;
1836     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1837     }
1838     else {
1839     /* This case is really difficult, since ecx can be used for
1840     addressing as well, and we must ensure to work even in that case. */
1841     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1842     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1843     #else
1844     /* [esp - 4] is reserved for eflags. */
1845     EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1846     #endif
1847    
1848     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1849     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1850     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1851     FAIL_IF(!code);
1852     *code |= mode;
1853    
1854     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1855     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1856     #else
1857     /* [esp - 4] is reserved for eflags. */
1858     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1859     #endif
1860     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1861     }
1862    
1863     return SLJIT_SUCCESS;
1864     }
1865    
1866 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1867 ph10 662 int dst, sljit_w dstw,
1868     int src1, sljit_w src1w,
1869     int src2, sljit_w src2w)
1870     {
1871     CHECK_ERROR();
1872     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1873    
1874     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1875     compiler->mode32 = op & SLJIT_INT_OP;
1876     #endif
1877     CHECK_EXTRA_REGS(dst, dstw, (void)0);
1878     CHECK_EXTRA_REGS(src1, src1w, (void)0);
1879     CHECK_EXTRA_REGS(src2, src2w, (void)0);
1880    
1881     if (GET_OPCODE(op) >= SLJIT_MUL) {
1882     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1883     compiler->flags_saved = 0;
1884     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1885     FAIL_IF(emit_save_flags(compiler));
1886     }
1887    
1888     switch (GET_OPCODE(op)) {
1889     case SLJIT_ADD:
1890     if (!GET_FLAGS(op)) {
1891     if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1892     return compiler->error;
1893     }
1894     else
1895     compiler->flags_saved = 0;
1896     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1897     FAIL_IF(emit_save_flags(compiler));
1898     return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1899     dst, dstw, src1, src1w, src2, src2w);
1900     case SLJIT_ADDC:
1901     if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1902     FAIL_IF(emit_restore_flags(compiler, 1));
1903     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1904     FAIL_IF(emit_save_flags(compiler));
1905     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1906     compiler->flags_saved = 0;
1907     return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1908     dst, dstw, src1, src1w, src2, src2w);
1909     case SLJIT_SUB:
1910     if (!GET_FLAGS(op)) {
1911     if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1912     return compiler->error;
1913     }
1914     else
1915     compiler->flags_saved = 0;
1916     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1917     FAIL_IF(emit_save_flags(compiler));
1918     if (dst == SLJIT_UNUSED)
1919     return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1920     return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1921     dst, dstw, src1, src1w, src2, src2w);
1922     case SLJIT_SUBC:
1923     if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1924     FAIL_IF(emit_restore_flags(compiler, 1));
1925     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1926     FAIL_IF(emit_save_flags(compiler));
1927     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1928     compiler->flags_saved = 0;
1929     return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1930     dst, dstw, src1, src1w, src2, src2w);
1931     case SLJIT_MUL:
1932     return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1933     case SLJIT_AND:
1934     if (dst == SLJIT_UNUSED)
1935     return emit_test_binary(compiler, src1, src1w, src2, src2w);
1936     return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1937     dst, dstw, src1, src1w, src2, src2w);
1938     case SLJIT_OR:
1939     return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1940     dst, dstw, src1, src1w, src2, src2w);
1941     case SLJIT_XOR:
1942     return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1943     dst, dstw, src1, src1w, src2, src2w);
1944     case SLJIT_SHL:
1945     return emit_shift(compiler, 0x4 << 3,
1946     dst, dstw, src1, src1w, src2, src2w);
1947     case SLJIT_LSHR:
1948     return emit_shift(compiler, 0x5 << 3,
1949     dst, dstw, src1, src1w, src2, src2w);
1950     case SLJIT_ASHR:
1951     return emit_shift(compiler, 0x7 << 3,
1952     dst, dstw, src1, src1w, src2, src2w);
1953     }
1954    
1955     return SLJIT_SUCCESS;
1956     }
1957    
1958 zherczeg 839 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1959     {
1960     check_sljit_get_register_index(reg);
1961     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1962     if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1963     || reg == SLJIT_GENERAL_EREG1 || reg == SLJIT_GENERAL_EREG2)
1964     return -1;
1965     #endif
1966     return reg_map[reg];
1967     }
1968    
1969     SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1970     void *instruction, int size)
1971     {
1972     sljit_ub *buf;
1973    
1974     CHECK_ERROR();
1975     check_sljit_emit_op_custom(compiler, instruction, size);
1976     SLJIT_ASSERT(size > 0 && size < 16);
1977    
1978     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
1979     FAIL_IF(!buf);
1980     INC_SIZE(size);
1981     SLJIT_MEMMOVE(buf, instruction, size);
1982     return SLJIT_SUCCESS;
1983     }
1984    
1985 ph10 662 /* --------------------------------------------------------------------- */
1986     /* Floating point operators */
1987     /* --------------------------------------------------------------------- */
1988    
1989     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
1990     static int sse2_available = 0;
1991     #endif
1992    
1993     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
1994    
1995     /* Alignment + 2 * 16 bytes. */
1996 zherczeg 704 static sljit_i sse2_data[3 + 4 + 4];
1997     static sljit_i *sse2_buffer;
1998 ph10 662
1999     static void init_compiler()
2000     {
2001     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2002     int features = 0;
2003     #endif
2004    
2005 zherczeg 704 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2006 ph10 662 sse2_buffer[0] = 0;
2007     sse2_buffer[1] = 0x80000000;
2008     sse2_buffer[4] = 0xffffffff;
2009     sse2_buffer[5] = 0x7fffffff;
2010    
2011     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2012     #ifdef __GNUC__
2013     /* AT&T syntax. */
2014     asm (
2015     "pushl %%ebx\n"
2016     "movl $0x1, %%eax\n"
2017     "cpuid\n"
2018     "popl %%ebx\n"
2019     "movl %%edx, %0\n"
2020     : "=g" (features)
2021     :
2022     : "%eax", "%ecx", "%edx"
2023     );
2024 zherczeg 704 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2025 ph10 662 /* Intel syntax. */
2026     __asm {
2027     mov eax, 1
2028     push ebx
2029     cpuid
2030     pop ebx
2031     mov features, edx
2032     }
2033     #else
2034     #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2035     #endif
2036     sse2_available = (features >> 26) & 0x1;
2037     #endif
2038     }
2039    
2040     #endif
2041    
2042 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2043 ph10 662 {
2044     /* Always available. */
2045     return 1;
2046     }
2047    
2048     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2049    
2050     static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2051     int xmm1, int xmm2, sljit_w xmm2w)
2052     {
2053     sljit_ub *buf;
2054    
2055     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2056     FAIL_IF(!buf);
2057     *buf++ = 0x0f;
2058     *buf = opcode;
2059     return SLJIT_SUCCESS;
2060     }
2061    
2062     static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2063     int xmm1, int xmm2, sljit_w xmm2w)
2064     {
2065     sljit_ub *buf;
2066    
2067     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2068     FAIL_IF(!buf);
2069     *buf++ = 0x0f;
2070     *buf = opcode;
2071     return SLJIT_SUCCESS;
2072     }
2073    
2074     static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2075     int dst, int src, sljit_w srcw)
2076     {
2077     return emit_sse2(compiler, 0x10, dst, src, srcw);
2078     }
2079    
2080     static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2081     int dst, sljit_w dstw, int src)
2082     {
2083     return emit_sse2(compiler, 0x11, src, dst, dstw);
2084     }
2085    
2086     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2087 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2088 ph10 662 #else
2089     static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2090     #endif
2091     int dst, sljit_w dstw,
2092     int src, sljit_w srcw)
2093     {
2094     int dst_r;
2095    
2096     CHECK_ERROR();
2097     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2098    
2099     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2100     compiler->mode32 = 1;
2101     #endif
2102    
2103     if (GET_OPCODE(op) == SLJIT_FCMP) {
2104     compiler->flags_saved = 0;
2105     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2106     dst_r = dst;
2107     else {
2108     dst_r = TMP_FREG;
2109     FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2110     }
2111     return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2112     }
2113    
2114     if (op == SLJIT_FMOV) {
2115     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2116     return emit_sse2_load(compiler, dst, src, srcw);
2117     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2118     return emit_sse2_store(compiler, dst, dstw, src);
2119     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2120     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2121     }
2122    
2123     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2124     dst_r = dst;
2125     if (dst != src)
2126     FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2127     }
2128     else {
2129     dst_r = TMP_FREG;
2130     FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2131     }
2132    
2133     switch (op) {
2134     case SLJIT_FNEG:
2135     FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2136     break;
2137    
2138     case SLJIT_FABS:
2139     FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2140     break;
2141     }
2142    
2143     if (dst_r == TMP_FREG)
2144     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2145     return SLJIT_SUCCESS;
2146     }
2147    
2148     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2149 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2150 ph10 662 #else
2151     static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2152     #endif
2153     int dst, sljit_w dstw,
2154     int src1, sljit_w src1w,
2155     int src2, sljit_w src2w)
2156     {
2157     int dst_r;
2158    
2159     CHECK_ERROR();
2160     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2161    
2162     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2163     compiler->mode32 = 1;
2164     #endif
2165    
2166     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2167     dst_r = dst;
2168     if (dst == src1)
2169     ; /* Do nothing here. */
2170     else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2171     /* Swap arguments. */
2172     src2 = src1;
2173     src2w = src1w;
2174     }
2175     else if (dst != src2)
2176     FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2177     else {
2178     dst_r = TMP_FREG;
2179     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2180     }
2181     }
2182     else {
2183     dst_r = TMP_FREG;
2184     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2185     }
2186    
2187     switch (op) {
2188     case SLJIT_FADD:
2189     FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2190     break;
2191    
2192     case SLJIT_FSUB:
2193     FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2194     break;
2195    
2196     case SLJIT_FMUL:
2197     FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2198     break;
2199    
2200     case SLJIT_FDIV:
2201     FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2202     break;
2203     }
2204    
2205     if (dst_r == TMP_FREG)
2206     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2207     return SLJIT_SUCCESS;
2208     }
2209    
2210     #endif
2211    
2212     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2213    
2214     static int emit_fld(struct sljit_compiler *compiler,
2215     int src, sljit_w srcw)
2216     {
2217     sljit_ub *buf;
2218    
2219     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2220     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2221     FAIL_IF(!buf);
2222     INC_SIZE(2);
2223     *buf++ = 0xd9;
2224     *buf = 0xc0 + src - 1;
2225     return SLJIT_SUCCESS;
2226     }
2227    
2228     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2229     FAIL_IF(!buf);
2230     *buf = 0xdd;
2231     return SLJIT_SUCCESS;
2232     }
2233    
2234     static int emit_fop(struct sljit_compiler *compiler,
2235     sljit_ub st_arg, sljit_ub st_arg2,
2236     sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2237     int src, sljit_w srcw)
2238     {
2239     sljit_ub *buf;
2240    
2241     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2242     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2243     FAIL_IF(!buf);
2244     INC_SIZE(2);
2245     *buf++ = st_arg;
2246     *buf = st_arg2 + src;
2247     return SLJIT_SUCCESS;
2248     }
2249    
2250     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2251     FAIL_IF(!buf);
2252     *buf++ = m64fp_arg;
2253     *buf |= m64fp_arg2;
2254     return SLJIT_SUCCESS;
2255     }
2256    
2257     static int emit_fop_regs(struct sljit_compiler *compiler,
2258     sljit_ub st_arg, sljit_ub st_arg2,
2259     int src)
2260     {
2261     sljit_ub *buf;
2262    
2263     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2264     FAIL_IF(!buf);
2265     INC_SIZE(2);
2266     *buf++ = st_arg;
2267     *buf = st_arg2 + src;
2268     return SLJIT_SUCCESS;
2269     }
2270    
2271     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2272 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2273 ph10 662 #else
2274     static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2275     #endif
2276     int dst, sljit_w dstw,
2277     int src, sljit_w srcw)
2278     {
2279     #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2280     sljit_ub *buf;
2281     #endif
2282    
2283     CHECK_ERROR();
2284     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2285    
2286     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2287     compiler->mode32 = 1;
2288     #endif
2289    
2290     if (GET_OPCODE(op) == SLJIT_FCMP) {
2291     compiler->flags_saved = 0;
2292     #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2293     FAIL_IF(emit_fld(compiler, dst, dstw));
2294     FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2295    
2296     /* Copy flags. */
2297     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2298     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2299     FAIL_IF(!buf);
2300     INC_SIZE(3);
2301     *buf++ = 0xdf;
2302     *buf++ = 0xe0;
2303     /* Note: lahf is not supported on all x86-64 architectures. */
2304     *buf++ = 0x9e;
2305     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2306     #else
2307     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2308     FAIL_IF(emit_fld(compiler, dst, dstw));
2309     FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2310     } else {
2311     FAIL_IF(emit_fld(compiler, src, srcw));
2312     FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2313     FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2314     FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2315     }
2316     #endif
2317     return SLJIT_SUCCESS;
2318     }
2319    
2320     FAIL_IF(emit_fld(compiler, src, srcw));
2321    
2322     switch (op) {
2323     case SLJIT_FNEG:
2324     FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2325     break;
2326     case SLJIT_FABS:
2327     FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2328     break;
2329     }
2330    
2331     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2332    
2333     return SLJIT_SUCCESS;
2334     }
2335    
2336     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2337 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2338 ph10 662 #else
2339     static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2340     #endif
2341     int dst, sljit_w dstw,
2342     int src1, sljit_w src1w,
2343     int src2, sljit_w src2w)
2344     {
2345     CHECK_ERROR();
2346     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2347    
2348     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2349     compiler->mode32 = 1;
2350     #endif
2351    
2352     if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2353     FAIL_IF(emit_fld(compiler, src2, src2w));
2354    
2355     switch (op) {
2356     case SLJIT_FADD:
2357     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2358     break;
2359     case SLJIT_FSUB:
2360     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2361     break;
2362     case SLJIT_FMUL:
2363     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2364     break;
2365     case SLJIT_FDIV:
2366     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2367     break;
2368     }
2369     return SLJIT_SUCCESS;
2370     }
2371    
2372     FAIL_IF(emit_fld(compiler, src1, src1w));
2373    
2374     if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2375     switch (op) {
2376     case SLJIT_FADD:
2377     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2378     break;
2379     case SLJIT_FSUB:
2380     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2381     break;
2382     case SLJIT_FMUL:
2383     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2384     break;
2385     case SLJIT_FDIV:
2386     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2387     break;
2388     }
2389     return SLJIT_SUCCESS;
2390     }
2391    
2392     switch (op) {
2393     case SLJIT_FADD:
2394     FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2395     break;
2396     case SLJIT_FSUB:
2397     FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2398     break;
2399     case SLJIT_FMUL:
2400     FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2401     break;
2402     case SLJIT_FDIV:
2403     FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2404     break;
2405     }
2406    
2407     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2408    
2409     return SLJIT_SUCCESS;
2410     }
2411     #endif
2412    
2413     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2414    
2415 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2416 ph10 662 int dst, sljit_w dstw,
2417     int src, sljit_w srcw)
2418     {
2419     if (sse2_available)
2420     return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2421     else
2422     return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2423     }
2424    
2425 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2426 ph10 662 int dst, sljit_w dstw,
2427     int src1, sljit_w src1w,
2428     int src2, sljit_w src2w)
2429     {
2430     if (sse2_available)
2431     return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2432     else
2433     return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2434     }
2435    
2436     #endif
2437    
2438     /* --------------------------------------------------------------------- */
2439     /* Conditional instructions */
2440     /* --------------------------------------------------------------------- */
2441    
2442 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2443 ph10 662 {
2444     sljit_ub *buf;
2445     struct sljit_label *label;
2446    
2447     CHECK_ERROR_PTR();
2448     check_sljit_emit_label(compiler);
2449    
2450     /* We should restore the flags before the label,
2451     since other taken jumps has their own flags as well. */
2452     if (SLJIT_UNLIKELY(compiler->flags_saved))
2453     PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2454    
2455     if (compiler->last_label && compiler->last_label->size == compiler->size)
2456     return compiler->last_label;
2457    
2458     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2459     PTR_FAIL_IF(!label);
2460     set_label(label, compiler);
2461    
2462     buf = (sljit_ub*)ensure_buf(compiler, 2);
2463     PTR_FAIL_IF(!buf);
2464    
2465     *buf++ = 0;
2466     *buf++ = 0;
2467    
2468     return label;
2469     }
2470    
2471 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2472 ph10 662 {
2473     sljit_ub *buf;
2474     struct sljit_jump *jump;
2475    
2476     CHECK_ERROR_PTR();
2477     check_sljit_emit_jump(compiler, type);
2478    
2479     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2480     if ((type & 0xff) <= SLJIT_JUMP)
2481     PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2482     compiler->flags_saved = 0;
2483     }
2484    
2485     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2486     PTR_FAIL_IF_NULL(jump);
2487     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2488     type &= 0xff;
2489    
2490     if (type >= SLJIT_CALL1)
2491     PTR_FAIL_IF(call_with_args(compiler, type));
2492    
2493     /* Worst case size. */
2494     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2495     compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2496     #else
2497     compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2498     #endif
2499    
2500     buf = (sljit_ub*)ensure_buf(compiler, 2);
2501     PTR_FAIL_IF_NULL(buf);
2502    
2503     *buf++ = 0;
2504     *buf++ = type + 4;
2505     return jump;
2506     }
2507    
2508 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2509 ph10 662 {
2510     sljit_ub *code;
2511     struct sljit_jump *jump;
2512    
2513     CHECK_ERROR();
2514     check_sljit_emit_ijump(compiler, type, src, srcw);
2515    
2516     CHECK_EXTRA_REGS(src, srcw, (void)0);
2517     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2518     if (type <= SLJIT_JUMP)
2519     FAIL_IF(emit_restore_flags(compiler, 0));
2520     compiler->flags_saved = 0;
2521     }
2522    
2523     if (type >= SLJIT_CALL1) {
2524     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2525     #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2526     if (src == SLJIT_TEMPORARY_REG3) {
2527     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2528     src = TMP_REGISTER;
2529     }
2530     if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2531     if (src & 0xf0) {
2532     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2533     src = TMP_REGISTER;
2534     }
2535     else
2536     srcw += sizeof(sljit_w);
2537     }
2538     #else
2539     if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2540     if (src & 0xf0) {
2541     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2542     src = TMP_REGISTER;
2543     }
2544     else
2545     srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2546     }
2547     #endif
2548     #endif
2549     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2550     if (src == SLJIT_TEMPORARY_REG3) {
2551     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2552     src = TMP_REGISTER;
2553     }
2554     #endif
2555     FAIL_IF(call_with_args(compiler, type));
2556     }
2557    
2558     if (src == SLJIT_IMM) {
2559     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2560     FAIL_IF_NULL(jump);
2561     set_jump(jump, compiler, JUMP_ADDR);
2562     jump->u.target = srcw;
2563    
2564     /* Worst case size. */
2565     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2566     compiler->size += 5;
2567     #else
2568     compiler->size += 10 + 3;
2569     #endif
2570    
2571     code = (sljit_ub*)ensure_buf(compiler, 2);
2572     FAIL_IF_NULL(code);
2573    
2574     *code++ = 0;
2575     *code++ = type + 4;
2576     }
2577     else {
2578     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2579     /* REX_W is not necessary (src is not immediate). */
2580     compiler->mode32 = 1;
2581     #endif
2582     code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2583     FAIL_IF(!code);
2584     *code++ = 0xff;
2585 zherczeg 722 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2586 ph10 662 }
2587     return SLJIT_SUCCESS;
2588     }
2589    
2590 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2591 ph10 662 {
2592     sljit_ub *buf;
2593     sljit_ub cond_set = 0;
2594     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2595     int reg;
2596     #endif
2597    
2598     CHECK_ERROR();
2599     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2600    
2601     if (dst == SLJIT_UNUSED)
2602     return SLJIT_SUCCESS;
2603    
2604     CHECK_EXTRA_REGS(dst, dstw, (void)0);
2605     if (SLJIT_UNLIKELY(compiler->flags_saved))
2606     FAIL_IF(emit_restore_flags(compiler, 0));
2607    
2608     switch (type) {
2609     case SLJIT_C_EQUAL:
2610     case SLJIT_C_FLOAT_EQUAL:
2611     cond_set = 0x94;
2612     break;
2613    
2614     case SLJIT_C_NOT_EQUAL:
2615     case SLJIT_C_FLOAT_NOT_EQUAL:
2616     cond_set = 0x95;
2617     break;
2618    
2619     case SLJIT_C_LESS:
2620     case SLJIT_C_FLOAT_LESS:
2621     cond_set = 0x92;
2622     break;
2623    
2624     case SLJIT_C_GREATER_EQUAL:
2625     case SLJIT_C_FLOAT_GREATER_EQUAL:
2626     cond_set = 0x93;
2627     break;
2628    
2629     case SLJIT_C_GREATER:
2630     case SLJIT_C_FLOAT_GREATER:
2631     cond_set = 0x97;
2632     break;
2633    
2634     case SLJIT_C_LESS_EQUAL:
2635     case SLJIT_C_FLOAT_LESS_EQUAL:
2636     cond_set = 0x96;
2637     break;
2638    
2639     case SLJIT_C_SIG_LESS:
2640     cond_set = 0x9c;
2641     break;
2642    
2643     case SLJIT_C_SIG_GREATER_EQUAL:
2644     cond_set = 0x9d;
2645     break;
2646    
2647     case SLJIT_C_SIG_GREATER:
2648     cond_set = 0x9f;
2649     break;
2650    
2651     case SLJIT_C_SIG_LESS_EQUAL:
2652     cond_set = 0x9e;
2653     break;
2654    
2655     case SLJIT_C_OVERFLOW:
2656     case SLJIT_C_MUL_OVERFLOW:
2657     cond_set = 0x90;
2658     break;
2659    
2660     case SLJIT_C_NOT_OVERFLOW:
2661     case SLJIT_C_MUL_NOT_OVERFLOW:
2662     cond_set = 0x91;
2663     break;
2664    
2665     case SLJIT_C_FLOAT_NAN:
2666     cond_set = 0x9a;
2667     break;
2668    
2669     case SLJIT_C_FLOAT_NOT_NAN:
2670     cond_set = 0x9b;
2671     break;
2672     }
2673    
2674     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2675     reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2676    
2677     buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2678     FAIL_IF(!buf);
2679     INC_SIZE(4 + 4);
2680     /* Set low register to conditional flag. */
2681     *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2682     *buf++ = 0x0f;
2683     *buf++ = cond_set;
2684     *buf++ = 0xC0 | reg_lmap[reg];
2685     *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2686     *buf++ = 0x0f;
2687     *buf++ = 0xb6;
2688     *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2689    
2690     if (reg == TMP_REGISTER) {
2691     if (op == SLJIT_MOV) {
2692     compiler->mode32 = 0;
2693     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2694     }
2695     else {
2696     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2697     compiler->skip_checks = 1;
2698     #endif
2699     return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2700     }
2701     }
2702     #else
2703     if (op == SLJIT_MOV) {
2704     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2705     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2706     FAIL_IF(!buf);
2707     INC_SIZE(3 + 3);
2708     /* Set low byte to conditional flag. */
2709     *buf++ = 0x0f;
2710     *buf++ = cond_set;
2711     *buf++ = 0xC0 | reg_map[dst];
2712    
2713     *buf++ = 0x0f;
2714     *buf++ = 0xb6;
2715     *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2716     }
2717     else {
2718     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2719    
2720     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2721     FAIL_IF(!buf);
2722     INC_SIZE(3 + 3);
2723     /* Set al to conditional flag. */
2724     *buf++ = 0x0f;
2725     *buf++ = cond_set;
2726     *buf++ = 0xC0;
2727    
2728     *buf++ = 0x0f;
2729     *buf++ = 0xb6;
2730     if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS)
2731     *buf = 0xC0 | (reg_map[dst] << 3);
2732     else {
2733     *buf = 0xC0;
2734     EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2735     }
2736    
2737     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2738     }
2739     }
2740     else {
2741     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2742     EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2743     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2744     FAIL_IF(!buf);
2745     INC_SIZE(3);
2746    
2747     *buf++ = 0x0f;
2748     *buf++ = cond_set;
2749     *buf++ = 0xC0 | reg_map[dst];
2750     }
2751     else {
2752     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2753    
2754     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2755     FAIL_IF(!buf);
2756     INC_SIZE(3 + 3 + 1);
2757     /* Set al to conditional flag. */
2758     *buf++ = 0x0f;
2759     *buf++ = cond_set;
2760     *buf++ = 0xC0;
2761    
2762     *buf++ = 0x0f;
2763     *buf++ = 0xb6;
2764     *buf++ = 0xC0;
2765    
2766     *buf++ = 0x90 + reg_map[TMP_REGISTER];
2767     }
2768     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2769     compiler->skip_checks = 1;
2770     #endif
2771     return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2772     }
2773     #endif
2774    
2775     return SLJIT_SUCCESS;
2776     }
2777    
2778 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2779 ph10 662 {
2780     sljit_ub *buf;
2781     struct sljit_const *const_;
2782     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2783     int reg;
2784     #endif
2785    
2786     CHECK_ERROR_PTR();
2787     check_sljit_emit_const(compiler, dst, dstw, init_value);
2788    
2789     CHECK_EXTRA_REGS(dst, dstw, (void)0);
2790    
2791     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2792     PTR_FAIL_IF(!const_);
2793     set_const(const_, compiler);
2794    
2795     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2796     compiler->mode32 = 0;
2797     reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2798    
2799     if (emit_load_imm64(compiler, reg, init_value))
2800     return NULL;
2801     #else
2802     if (dst == SLJIT_UNUSED)
2803     dst = TMP_REGISTER;
2804    
2805     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2806     return NULL;
2807     #endif
2808    
2809     buf = (sljit_ub*)ensure_buf(compiler, 2);
2810     PTR_FAIL_IF(!buf);
2811    
2812     *buf++ = 0;
2813     *buf++ = 1;
2814    
2815     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2816     if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2817     if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2818     return NULL;
2819     #endif
2820    
2821     return const_;
2822     }
2823    
2824 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2825 ph10 662 {
2826     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2827     *(sljit_w*)addr = new_addr - (addr + 4);
2828     #else
2829     *(sljit_uw*)addr = new_addr;
2830     #endif
2831     }
2832    
2833 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2834 ph10 662 {
2835     *(sljit_w*)addr = new_constant;
2836     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12