/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 880 - (hide annotations) (download)
Sun Jan 15 17:23:37 2012 UTC (16 months ago) by zherczeg
File MIME type: text/plain
File size: 79885 byte(s)
JIT compiler update (rename GENERAL to SAVED)
1 ph10 662 /*
2     * Stack-less Just-In-Time compiler
3     *
4 ph10 836 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 ph10 662 *
6     * Redistribution and use in source and binary forms, with or without modification, are
7     * permitted provided that the following conditions are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright notice, this list of
10     * conditions and the following disclaimer.
11     *
12     * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13     * of conditions and the following disclaimer in the documentation and/or other materials
14     * provided with the distribution.
15     *
16     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17     * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19     * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21     * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22     * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24     * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25     */
26    
27 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 ph10 662 {
29     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
30     return "x86-32";
31     #else
32     return "x86-64";
33     #endif
34     }
35    
36     /*
37     32b register indexes:
38     0 - EAX
39     1 - ECX
40     2 - EDX
41     3 - EBX
42     4 - none
43     5 - EBP
44     6 - ESI
45     7 - EDI
46     */
47    
48     /*
49     64b register indexes:
50     0 - RAX
51     1 - RCX
52     2 - RDX
53     3 - RBX
54     4 - none
55     5 - RBP
56     6 - RSI
57     7 - RDI
58     8 - R8 - From now on REX prefix is required
59     9 - R9
60     10 - R10
61     11 - R11
62     12 - R12
63     13 - R13
64     14 - R14
65     15 - R15
66     */
67    
68     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69    
70     /* Last register + 1. */
71     #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
72    
73     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
74     0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
75     };
76    
77     #define CHECK_EXTRA_REGS(p, w, do) \
78     if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
79     w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
80     p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
81     do; \
82     } \
83 zherczeg 880 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
84     w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
85 ph10 662 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
86     do; \
87     }
88    
89     #else /* SLJIT_CONFIG_X86_32 */
90    
91     /* Last register + 1. */
92     #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
93     #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
94     #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
95    
96     /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
97     Note: avoid to use r12 and r13 for memory addessing
98 zherczeg 880 therefore r12 is better for SAVED_EREG than SAVED_REG. */
99 ph10 662 #ifndef _WIN64
100     /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
101     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
102     0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
103     };
104     /* low-map. reg_map & 0x7. */
105     static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
106     0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
107     };
108     #else
109     /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
110     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
111     0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
112     };
113     /* low-map. reg_map & 0x7. */
114     static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
115     0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
116     };
117     #endif
118    
119     #define REX_W 0x48
120     #define REX_R 0x44
121     #define REX_X 0x42
122     #define REX_B 0x41
123     #define REX 0x40
124    
125     typedef unsigned int sljit_uhw;
126     typedef int sljit_hw;
127    
128     #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
129     #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
130    
131     #define CHECK_EXTRA_REGS(p, w, do)
132    
133     #endif /* SLJIT_CONFIG_X86_32 */
134    
135     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
136     #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
137     #endif
138    
139     /* Size flags for emit_x86_instruction: */
140     #define EX86_BIN_INS 0x0010
141     #define EX86_SHIFT_INS 0x0020
142     #define EX86_REX 0x0040
143     #define EX86_NO_REXW 0x0080
144     #define EX86_BYTE_ARG 0x0100
145     #define EX86_HALF_ARG 0x0200
146     #define EX86_PREF_66 0x0400
147    
148     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
149     #define EX86_PREF_F2 0x0800
150     #define EX86_SSE2 0x1000
151     #endif
152    
153     #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
154     #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
155    
156     #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
157     #define POP_REG(r) (*buf++ = (0x58 + (r)))
158     #define RET() (*buf++ = (0xc3))
159     #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
160     /* r32, r/m32 */
161     #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
162    
163     static sljit_ub get_jump_code(int type)
164     {
165     switch (type) {
166     case SLJIT_C_EQUAL:
167     case SLJIT_C_FLOAT_EQUAL:
168     return 0x84;
169    
170     case SLJIT_C_NOT_EQUAL:
171     case SLJIT_C_FLOAT_NOT_EQUAL:
172     return 0x85;
173    
174     case SLJIT_C_LESS:
175     case SLJIT_C_FLOAT_LESS:
176     return 0x82;
177    
178     case SLJIT_C_GREATER_EQUAL:
179     case SLJIT_C_FLOAT_GREATER_EQUAL:
180     return 0x83;
181    
182     case SLJIT_C_GREATER:
183     case SLJIT_C_FLOAT_GREATER:
184     return 0x87;
185    
186     case SLJIT_C_LESS_EQUAL:
187     case SLJIT_C_FLOAT_LESS_EQUAL:
188     return 0x86;
189    
190     case SLJIT_C_SIG_LESS:
191     return 0x8c;
192    
193     case SLJIT_C_SIG_GREATER_EQUAL:
194     return 0x8d;
195    
196     case SLJIT_C_SIG_GREATER:
197     return 0x8f;
198    
199     case SLJIT_C_SIG_LESS_EQUAL:
200     return 0x8e;
201    
202     case SLJIT_C_OVERFLOW:
203     case SLJIT_C_MUL_OVERFLOW:
204     return 0x80;
205    
206     case SLJIT_C_NOT_OVERFLOW:
207     case SLJIT_C_MUL_NOT_OVERFLOW:
208     return 0x81;
209    
210     case SLJIT_C_FLOAT_NAN:
211     return 0x8a;
212    
213     case SLJIT_C_FLOAT_NOT_NAN:
214     return 0x8b;
215     }
216     return 0;
217     }
218    
219     static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
220    
221     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
222     static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
223     #endif
224    
225     static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
226     {
227     int short_jump;
228     sljit_uw label_addr;
229    
230     if (jump->flags & JUMP_LABEL)
231     label_addr = (sljit_uw)(code + jump->u.label->size);
232     else
233     label_addr = jump->u.target;
234     short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
235    
236     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
237     if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
238     return generate_far_jump_code(jump, code_ptr, type);
239     #endif
240    
241     if (type == SLJIT_JUMP) {
242     if (short_jump)
243     *code_ptr++ = 0xeb;
244     else
245     *code_ptr++ = 0xe9;
246     jump->addr++;
247     }
248 zherczeg 722 else if (type >= SLJIT_FAST_CALL) {
249 ph10 662 short_jump = 0;
250     *code_ptr++ = 0xe8;
251     jump->addr++;
252     }
253     else if (short_jump) {
254     *code_ptr++ = get_jump_code(type) - 0x10;
255     jump->addr++;
256     }
257     else {
258     *code_ptr++ = 0x0f;
259     *code_ptr++ = get_jump_code(type);
260     jump->addr += 2;
261     }
262    
263     if (short_jump) {
264     jump->flags |= PATCH_MB;
265     code_ptr += sizeof(sljit_b);
266     } else {
267     jump->flags |= PATCH_MW;
268     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
269     code_ptr += sizeof(sljit_w);
270     #else
271     code_ptr += sizeof(sljit_hw);
272     #endif
273     }
274    
275     return code_ptr;
276     }
277    
278 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
279 ph10 662 {
280     struct sljit_memory_fragment *buf;
281     sljit_ub *code;
282     sljit_ub *code_ptr;
283     sljit_ub *buf_ptr;
284     sljit_ub *buf_end;
285     sljit_ub len;
286    
287     struct sljit_label *label;
288     struct sljit_jump *jump;
289     struct sljit_const *const_;
290    
291     CHECK_ERROR_PTR();
292     check_sljit_generate_code(compiler);
293     reverse_buf(compiler);
294    
295     /* Second code generation pass. */
296     code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
297     PTR_FAIL_WITH_EXEC_IF(code);
298     buf = compiler->buf;
299    
300     code_ptr = code;
301     label = compiler->labels;
302     jump = compiler->jumps;
303     const_ = compiler->consts;
304     do {
305     buf_ptr = buf->memory;
306     buf_end = buf_ptr + buf->used_size;
307     do {
308     len = *buf_ptr++;
309     if (len > 0) {
310     /* The code is already generated. */
311     SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
312     code_ptr += len;
313     buf_ptr += len;
314     }
315     else {
316     if (*buf_ptr >= 4) {
317     jump->addr = (sljit_uw)code_ptr;
318     if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
319     code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
320     else
321     code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
322     jump = jump->next;
323     }
324     else if (*buf_ptr == 0) {
325     label->addr = (sljit_uw)code_ptr;
326     label->size = code_ptr - code;
327     label = label->next;
328     }
329     else if (*buf_ptr == 1) {
330     const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
331     const_ = const_->next;
332     }
333     else {
334     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
335     *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
336     buf_ptr++;
337     *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
338     code_ptr += sizeof(sljit_w);
339     buf_ptr += sizeof(sljit_w) - 1;
340     #else
341     code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
342     buf_ptr += sizeof(sljit_w);
343     #endif
344     }
345     buf_ptr++;
346     }
347     } while (buf_ptr < buf_end);
348     SLJIT_ASSERT(buf_ptr == buf_end);
349     buf = buf->next;
350     } while (buf);
351    
352     SLJIT_ASSERT(!label);
353     SLJIT_ASSERT(!jump);
354     SLJIT_ASSERT(!const_);
355    
356     jump = compiler->jumps;
357     while (jump) {
358     if (jump->flags & PATCH_MB) {
359     SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
360 ph10 836 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
361 ph10 662 } else if (jump->flags & PATCH_MW) {
362     if (jump->flags & JUMP_LABEL) {
363     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
364 ph10 836 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
365 ph10 662 #else
366     SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
367 ph10 836 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
368 ph10 662 #endif
369     }
370     else {
371     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
372 ph10 836 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
373 ph10 662 #else
374     SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
375 ph10 836 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
376 ph10 662 #endif
377     }
378     }
379     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
380     else if (jump->flags & PATCH_MD)
381     *(sljit_w*)jump->addr = jump->u.label->addr;
382     #endif
383    
384     jump = jump->next;
385     }
386    
387     /* Maybe we waste some space because of short jumps. */
388     SLJIT_ASSERT(code_ptr <= code + compiler->size);
389     compiler->error = SLJIT_ERR_COMPILED;
390 ph10 836 compiler->executable_size = compiler->size;
391 ph10 662 return (void*)code;
392     }
393    
394     /* --------------------------------------------------------------------- */
395     /* Operators */
396     /* --------------------------------------------------------------------- */
397    
398     static int emit_cum_binary(struct sljit_compiler *compiler,
399     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
400     int dst, sljit_w dstw,
401     int src1, sljit_w src1w,
402     int src2, sljit_w src2w);
403    
404     static int emit_non_cum_binary(struct sljit_compiler *compiler,
405     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
406     int dst, sljit_w dstw,
407     int src1, sljit_w src1w,
408     int src2, sljit_w src2w);
409    
410     static int emit_mov(struct sljit_compiler *compiler,
411     int dst, sljit_w dstw,
412     int src, sljit_w srcw);
413    
414     static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
415     {
416     sljit_ub *buf;
417    
418     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
419     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
420     FAIL_IF(!buf);
421     INC_SIZE(5);
422     *buf++ = 0x9c; /* pushfd */
423     #else
424     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
425     FAIL_IF(!buf);
426     INC_SIZE(6);
427     *buf++ = 0x9c; /* pushfq */
428     *buf++ = 0x48;
429     #endif
430     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
431     *buf++ = 0x64;
432     *buf++ = 0x24;
433     *buf++ = sizeof(sljit_w);
434     compiler->flags_saved = 1;
435     return SLJIT_SUCCESS;
436     }
437    
438     static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
439     {
440     sljit_ub *buf;
441    
442     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443     buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
444     FAIL_IF(!buf);
445     INC_SIZE(5);
446     #else
447     buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
448     FAIL_IF(!buf);
449     INC_SIZE(6);
450     *buf++ = 0x48;
451     #endif
452     *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
453     *buf++ = 0x64;
454     *buf++ = 0x24;
455     *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
456     *buf++ = 0x9d; /* popfd / popfq */
457     compiler->flags_saved = keep_flags;
458     return SLJIT_SUCCESS;
459     }
460    
461     #ifdef _WIN32
462     #include <malloc.h>
463    
464     static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
465     {
466     /* Workaround for calling _chkstk. */
467     alloca(local_size);
468     }
469     #endif
470    
471     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472     #include "sljitNativeX86_32.c"
473     #else
474     #include "sljitNativeX86_64.c"
475     #endif
476    
477     static int emit_mov(struct sljit_compiler *compiler,
478     int dst, sljit_w dstw,
479     int src, sljit_w srcw)
480     {
481     sljit_ub* code;
482    
483     if (dst == SLJIT_UNUSED) {
484     /* No destination, doesn't need to setup flags. */
485     if (src & SLJIT_MEM) {
486     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
487     FAIL_IF(!code);
488     *code = 0x8b;
489     }
490     return SLJIT_SUCCESS;
491     }
492     if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
493     code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
494     FAIL_IF(!code);
495     *code = 0x89;
496     return SLJIT_SUCCESS;
497     }
498     if (src & SLJIT_IMM) {
499     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
500     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
501     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
502     #else
503     if (!compiler->mode32) {
504     if (NOT_HALFWORD(srcw))
505     return emit_load_imm64(compiler, dst, srcw);
506     }
507     else
508     return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
509     #endif
510     }
511     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
512     if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
513     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
514     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
515     FAIL_IF(!code);
516     *code = 0x89;
517     return SLJIT_SUCCESS;
518     }
519     #endif
520     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
521     FAIL_IF(!code);
522     *code = 0xc7;
523     return SLJIT_SUCCESS;
524     }
525     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
526     code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
527     FAIL_IF(!code);
528     *code = 0x8b;
529     return SLJIT_SUCCESS;
530     }
531    
532     /* Memory to memory move. Requires two instruction. */
533     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
534     FAIL_IF(!code);
535     *code = 0x8b;
536     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
537     FAIL_IF(!code);
538     *code = 0x89;
539     return SLJIT_SUCCESS;
540     }
541    
542     #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
543     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
544    
545 zherczeg 847 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
546     {
547     sljit_ub *buf;
548     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
549     int size;
550     #endif
551    
552     CHECK_ERROR();
553     check_sljit_emit_op0(compiler, op);
554    
555     switch (GET_OPCODE(op)) {
556     case SLJIT_BREAKPOINT:
557     buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
558     FAIL_IF(!buf);
559     INC_SIZE(1);
560     *buf = 0xcc;
561     break;
562     case SLJIT_NOP:
563     buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
564     FAIL_IF(!buf);
565     INC_SIZE(1);
566     *buf = 0x90;
567     break;
568     case SLJIT_UMUL:
569     case SLJIT_SMUL:
570     case SLJIT_UDIV:
571     case SLJIT_SDIV:
572     compiler->flags_saved = 0;
573     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
574     #ifdef _WIN64
575     SLJIT_COMPILE_ASSERT(
576     reg_map[SLJIT_TEMPORARY_REG1] == 0
577     && reg_map[SLJIT_TEMPORARY_REG2] == 2
578     && reg_map[TMP_REGISTER] > 7,
579     invalid_register_assignment_for_div_mul);
580     #else
581     SLJIT_COMPILE_ASSERT(
582     reg_map[SLJIT_TEMPORARY_REG1] == 0
583     && reg_map[SLJIT_TEMPORARY_REG2] < 7
584     && reg_map[TMP_REGISTER] == 2,
585     invalid_register_assignment_for_div_mul);
586     #endif
587     compiler->mode32 = op & SLJIT_INT_OP;
588     #endif
589    
590     op = GET_OPCODE(op);
591     if (op == SLJIT_UDIV) {
592     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
593     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
594     buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
595     #else
596     buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
597     #endif
598     FAIL_IF(!buf);
599     *buf = 0x33;
600     }
601    
602     if (op == SLJIT_SDIV) {
603     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
604     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
605     #endif
606    
607 zherczeg 867 /* CDQ instruction */
608 zherczeg 847 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
609 zherczeg 867 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
610 zherczeg 847 FAIL_IF(!buf);
611 zherczeg 867 INC_SIZE(1);
612     *buf = 0x99;
613 zherczeg 847 #else
614     if (compiler->mode32) {
615 zherczeg 867 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
616 zherczeg 847 FAIL_IF(!buf);
617 zherczeg 867 INC_SIZE(1);
618     *buf = 0x99;
619 zherczeg 847 } else {
620 zherczeg 867 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
621 zherczeg 847 FAIL_IF(!buf);
622 zherczeg 867 INC_SIZE(2);
623 zherczeg 847 *buf++ = REX_W;
624 zherczeg 867 *buf = 0x99;
625 zherczeg 847 }
626     #endif
627     }
628    
629     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
630     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
631     FAIL_IF(!buf);
632     INC_SIZE(2);
633     *buf++ = 0xf7;
634     *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
635     #else
636     #ifdef _WIN64
637     size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
638     #else
639     size = (!compiler->mode32) ? 3 : 2;
640     #endif
641     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
642     FAIL_IF(!buf);
643     INC_SIZE(size);
644     #ifdef _WIN64
645     if (!compiler->mode32)
646     *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
647     else if (op >= SLJIT_UDIV)
648     *buf++ = REX_B;
649     *buf++ = 0xf7;
650     *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
651     #else
652     if (!compiler->mode32)
653     *buf++ = REX_W;
654     *buf++ = 0xf7;
655     *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
656     #endif
657     #endif
658     switch (op) {
659     case SLJIT_UMUL:
660     *buf |= 4 << 3;
661     break;
662     case SLJIT_SMUL:
663     *buf |= 5 << 3;
664     break;
665     case SLJIT_UDIV:
666     *buf |= 6 << 3;
667     break;
668     case SLJIT_SDIV:
669     *buf |= 7 << 3;
670     break;
671     }
672     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
673     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
674     #endif
675     break;
676     }
677    
678     return SLJIT_SUCCESS;
679     }
680    
681 ph10 662 #define ENCODE_PREFIX(prefix) \
682     do { \
683     code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
684     FAIL_IF(!code); \
685     INC_CSIZE(1); \
686     *code = (prefix); \
687     } while (0)
688    
689     static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
690     int dst, sljit_w dstw,
691     int src, sljit_w srcw)
692     {
693     sljit_ub* code;
694     int dst_r;
695     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
696     int work_r;
697     #endif
698    
699     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
700     compiler->mode32 = 0;
701     #endif
702    
703     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
704     return SLJIT_SUCCESS; /* Empty instruction. */
705    
706     if (src & SLJIT_IMM) {
707     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
708     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
709     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
710     #else
711     return emit_load_imm64(compiler, dst, srcw);
712     #endif
713     }
714     code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
715     FAIL_IF(!code);
716     *code = 0xc6;
717     return SLJIT_SUCCESS;
718     }
719    
720     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
721    
722     if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
723     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
724     if (reg_map[src] >= 4) {
725     SLJIT_ASSERT(dst_r == TMP_REGISTER);
726     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
727     } else
728     dst_r = src;
729     #else
730     dst_r = src;
731     #endif
732     }
733     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
734     else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
735     /* src, dst are registers. */
736     SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
737     if (reg_map[dst] < 4) {
738     if (dst != src)
739     EMIT_MOV(compiler, dst, 0, src, 0);
740     code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
741     FAIL_IF(!code);
742     *code++ = 0x0f;
743     *code = sign ? 0xbe : 0xb6;
744     }
745     else {
746     if (dst != src)
747     EMIT_MOV(compiler, dst, 0, src, 0);
748     if (sign) {
749     /* shl reg, 24 */
750     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
751     FAIL_IF(!code);
752     *code |= 0x4 << 3;
753     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
754     FAIL_IF(!code);
755     /* shr/sar reg, 24 */
756     *code |= 0x7 << 3;
757     }
758     else {
759     /* and dst, 0xff */
760     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
761     FAIL_IF(!code);
762     *(code + 1) |= 0x4 << 3;
763     }
764     }
765     return SLJIT_SUCCESS;
766     }
767     #endif
768     else {
769     /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
770     code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
771     FAIL_IF(!code);
772     *code++ = 0x0f;
773     *code = sign ? 0xbe : 0xb6;
774     }
775    
776     if (dst & SLJIT_MEM) {
777     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
778     if (dst_r == TMP_REGISTER) {
779     /* Find a non-used register, whose reg_map[src] < 4. */
780     if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
781     if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
782     work_r = SLJIT_TEMPORARY_REG3;
783     else
784     work_r = SLJIT_TEMPORARY_REG2;
785     }
786     else {
787     if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
788     work_r = SLJIT_TEMPORARY_REG1;
789     else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
790     work_r = SLJIT_TEMPORARY_REG3;
791     else
792     work_r = SLJIT_TEMPORARY_REG2;
793     }
794    
795     if (work_r == SLJIT_TEMPORARY_REG1) {
796     ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
797     }
798     else {
799     code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
800     FAIL_IF(!code);
801     *code = 0x87;
802     }
803    
804     code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
805     FAIL_IF(!code);
806     *code = 0x88;
807    
808     if (work_r == SLJIT_TEMPORARY_REG1) {
809     ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
810     }
811     else {
812     code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
813     FAIL_IF(!code);
814     *code = 0x87;
815     }
816     }
817     else {
818     code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
819     FAIL_IF(!code);
820     *code = 0x88;
821     }
822     #else
823     code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
824     FAIL_IF(!code);
825     *code = 0x88;
826     #endif
827     }
828    
829     return SLJIT_SUCCESS;
830     }
831    
832     static int emit_mov_half(struct sljit_compiler *compiler, int sign,
833     int dst, sljit_w dstw,
834     int src, sljit_w srcw)
835     {
836     sljit_ub* code;
837     int dst_r;
838    
839     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
840     compiler->mode32 = 0;
841     #endif
842    
843     if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
844     return SLJIT_SUCCESS; /* Empty instruction. */
845    
846     if (src & SLJIT_IMM) {
847     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
848     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
849     return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
850     #else
851     return emit_load_imm64(compiler, dst, srcw);
852     #endif
853     }
854     code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
855     FAIL_IF(!code);
856     *code = 0xc7;
857     return SLJIT_SUCCESS;
858     }
859    
860     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
861    
862     if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
863     dst_r = src;
864     else {
865     code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
866     FAIL_IF(!code);
867     *code++ = 0x0f;
868     *code = sign ? 0xbf : 0xb7;
869     }
870    
871     if (dst & SLJIT_MEM) {
872     code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
873     FAIL_IF(!code);
874     *code = 0x89;
875     }
876    
877     return SLJIT_SUCCESS;
878     }
879    
880     static int emit_unary(struct sljit_compiler *compiler, int un_index,
881     int dst, sljit_w dstw,
882     int src, sljit_w srcw)
883     {
884     sljit_ub* code;
885    
886     if (dst == SLJIT_UNUSED) {
887     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
888     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
889     FAIL_IF(!code);
890     *code++ = 0xf7;
891     *code |= (un_index) << 3;
892     return SLJIT_SUCCESS;
893     }
894     if (dst == src && dstw == srcw) {
895     /* Same input and output */
896     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
897     FAIL_IF(!code);
898     *code++ = 0xf7;
899     *code |= (un_index) << 3;
900     return SLJIT_SUCCESS;
901     }
902     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
903     EMIT_MOV(compiler, dst, 0, src, srcw);
904     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
905     FAIL_IF(!code);
906     *code++ = 0xf7;
907     *code |= (un_index) << 3;
908     return SLJIT_SUCCESS;
909     }
910     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
911     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
912     FAIL_IF(!code);
913     *code++ = 0xf7;
914     *code |= (un_index) << 3;
915     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
916     return SLJIT_SUCCESS;
917     }
918    
919     static int emit_not_with_flags(struct sljit_compiler *compiler,
920     int dst, sljit_w dstw,
921     int src, sljit_w srcw)
922     {
923     sljit_ub* code;
924    
925     if (dst == SLJIT_UNUSED) {
926     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
927     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
928     FAIL_IF(!code);
929     *code++ = 0xf7;
930     *code |= 0x2 << 3;
931     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
932     FAIL_IF(!code);
933     *code = 0x0b;
934     return SLJIT_SUCCESS;
935     }
936     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
937     EMIT_MOV(compiler, dst, 0, src, srcw);
938     code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
939     FAIL_IF(!code);
940     *code++ = 0xf7;
941     *code |= 0x2 << 3;
942     code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
943     FAIL_IF(!code);
944     *code = 0x0b;
945     return SLJIT_SUCCESS;
946     }
947     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
948     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
949     FAIL_IF(!code);
950     *code++ = 0xf7;
951     *code |= 0x2 << 3;
952     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
953     FAIL_IF(!code);
954     *code = 0x0b;
955     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
956     return SLJIT_SUCCESS;
957     }
958    
959     static int emit_clz(struct sljit_compiler *compiler, int op,
960     int dst, sljit_w dstw,
961     int src, sljit_w srcw)
962     {
963     sljit_ub* code;
964     int dst_r;
965    
966     if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
967     /* Just set the zero flag. */
968     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
969     code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
970     FAIL_IF(!code);
971     *code++ = 0xf7;
972     *code |= 0x2 << 3;
973     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
974     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
975     #else
976     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
977     #endif
978     FAIL_IF(!code);
979     *code |= 0x5 << 3;
980     return SLJIT_SUCCESS;
981     }
982    
983     if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
984     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
985     src = TMP_REGISTER;
986     srcw = 0;
987     }
988    
989     code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
990     FAIL_IF(!code);
991     *code++ = 0x0f;
992     *code = 0xbd;
993    
994     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
995     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
996     dst_r = dst;
997     else {
998     /* Find an unused temporary register. */
999     if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1000     dst_r = SLJIT_TEMPORARY_REG1;
1001     else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1002     dst_r = SLJIT_TEMPORARY_REG2;
1003     else
1004     dst_r = SLJIT_TEMPORARY_REG3;
1005     EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1006     }
1007     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1008     #else
1009     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1010     compiler->mode32 = 0;
1011     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1012     compiler->mode32 = op & SLJIT_INT_OP;
1013     #endif
1014    
1015     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1016     FAIL_IF(!code);
1017     *code++ = 0x0f;
1018     *code = 0x45;
1019    
1020     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1021     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1022     #else
1023     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1024     #endif
1025     FAIL_IF(!code);
1026     *(code + 1) |= 0x6 << 3;
1027    
1028     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1029     if (dst & SLJIT_MEM) {
1030     code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1031     FAIL_IF(!code);
1032     *code = 0x87;
1033     }
1034     #else
1035     if (dst & SLJIT_MEM)
1036     EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1037     #endif
1038     return SLJIT_SUCCESS;
1039     }
1040    
1041 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1042 ph10 662 int dst, sljit_w dstw,
1043     int src, sljit_w srcw)
1044     {
1045     sljit_ub* code;
1046     int update = 0;
1047     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1048     int dst_is_ereg = 0;
1049     int src_is_ereg = 0;
1050     #else
1051     #define src_is_ereg 0
1052     #endif
1053    
1054     CHECK_ERROR();
1055     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1056    
1057     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1058     compiler->mode32 = op & SLJIT_INT_OP;
1059     #endif
1060     CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1061     CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1062    
1063     if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1064     op = GET_OPCODE(op);
1065     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066     compiler->mode32 = 0;
1067     #endif
1068    
1069 zherczeg 740 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1070 ph10 662 if (op >= SLJIT_MOVU) {
1071     update = 1;
1072     op -= 7;
1073     }
1074    
1075     if (src & SLJIT_IMM) {
1076     switch (op) {
1077     case SLJIT_MOV_UB:
1078     srcw = (unsigned char)srcw;
1079     break;
1080     case SLJIT_MOV_SB:
1081     srcw = (signed char)srcw;
1082     break;
1083     case SLJIT_MOV_UH:
1084     srcw = (unsigned short)srcw;
1085     break;
1086     case SLJIT_MOV_SH:
1087     srcw = (signed short)srcw;
1088     break;
1089     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1090     case SLJIT_MOV_UI:
1091     srcw = (unsigned int)srcw;
1092     break;
1093     case SLJIT_MOV_SI:
1094     srcw = (signed int)srcw;
1095     break;
1096     #endif
1097     }
1098     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1099     if (SLJIT_UNLIKELY(dst_is_ereg))
1100     return emit_mov(compiler, dst, dstw, src, srcw);
1101     #endif
1102     }
1103    
1104     if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1105     code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1106     FAIL_IF(!code);
1107     *code = 0x8d;
1108     src &= SLJIT_MEM | 0xf;
1109     srcw = 0;
1110     }
1111    
1112     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1113     if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1114     SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1115     dst = TMP_REGISTER;
1116     }
1117     #endif
1118    
1119     switch (op) {
1120     case SLJIT_MOV:
1121     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1122     case SLJIT_MOV_UI:
1123     case SLJIT_MOV_SI:
1124     #endif
1125     FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1126     break;
1127     case SLJIT_MOV_UB:
1128     FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1129     break;
1130     case SLJIT_MOV_SB:
1131     FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1132     break;
1133     case SLJIT_MOV_UH:
1134     FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1135     break;
1136     case SLJIT_MOV_SH:
1137     FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1138     break;
1139     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1140     case SLJIT_MOV_UI:
1141     FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1142     break;
1143     case SLJIT_MOV_SI:
1144     FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1145     break;
1146     #endif
1147     }
1148    
1149     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1150     if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1151     return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1152     #endif
1153    
1154     if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1155     code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1156     FAIL_IF(!code);
1157     *code = 0x8d;
1158     }
1159     return SLJIT_SUCCESS;
1160     }
1161    
1162     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1163     compiler->flags_saved = 0;
1164    
1165     switch (GET_OPCODE(op)) {
1166     case SLJIT_NOT:
1167     if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1168     return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1169     return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1170    
1171     case SLJIT_NEG:
1172     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1173     FAIL_IF(emit_save_flags(compiler));
1174     return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1175    
1176     case SLJIT_CLZ:
1177     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1178     FAIL_IF(emit_save_flags(compiler));
1179     return emit_clz(compiler, op, dst, dstw, src, srcw);
1180     }
1181    
1182     return SLJIT_SUCCESS;
1183    
1184     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1185     #undef src_is_ereg
1186     #endif
1187     }
1188    
1189     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1190    
1191     #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1192     if (IS_HALFWORD(immw) || compiler->mode32) { \
1193     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1194     FAIL_IF(!code); \
1195     *(code + 1) |= (_op_imm_); \
1196     } \
1197     else { \
1198     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1199     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1200     FAIL_IF(!code); \
1201     *code = (_op_mr_); \
1202     }
1203    
1204     #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1205     FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1206    
1207     #else
1208    
1209     #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1210     code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1211     FAIL_IF(!code); \
1212     *(code + 1) |= (_op_imm_);
1213    
1214     #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1215     FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1216    
1217     #endif
1218    
1219     static int emit_cum_binary(struct sljit_compiler *compiler,
1220     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1221     int dst, sljit_w dstw,
1222     int src1, sljit_w src1w,
1223     int src2, sljit_w src2w)
1224     {
1225     sljit_ub* code;
1226    
1227     if (dst == SLJIT_UNUSED) {
1228     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1229     if (src2 & SLJIT_IMM) {
1230     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1231     }
1232     else {
1233     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1234     FAIL_IF(!code);
1235     *code = op_rm;
1236     }
1237     return SLJIT_SUCCESS;
1238     }
1239    
1240     if (dst == src1 && dstw == src1w) {
1241     if (src2 & SLJIT_IMM) {
1242     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1243     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1244     #else
1245     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1246     #endif
1247     BINARY_EAX_IMM(op_eax_imm, src2w);
1248     }
1249     else {
1250     BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1251     }
1252     }
1253     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1254     code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1255     FAIL_IF(!code);
1256     *code = op_rm;
1257     }
1258     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1259     /* Special exception for sljit_emit_cond_value. */
1260     code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1261     FAIL_IF(!code);
1262     *code = op_mr;
1263     }
1264     else {
1265     EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1266     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1267     FAIL_IF(!code);
1268     *code = op_mr;
1269     }
1270     return SLJIT_SUCCESS;
1271     }
1272    
1273     /* Only for cumulative operations. */
1274     if (dst == src2 && dstw == src2w) {
1275     if (src1 & SLJIT_IMM) {
1276     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1277     if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1278     #else
1279     if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1280     #endif
1281     BINARY_EAX_IMM(op_eax_imm, src1w);
1282     }
1283     else {
1284     BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1285     }
1286     }
1287     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1288     code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1289     FAIL_IF(!code);
1290     *code = op_rm;
1291     }
1292     else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1293     code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1294     FAIL_IF(!code);
1295     *code = op_mr;
1296     }
1297     else {
1298     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1299     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1300     FAIL_IF(!code);
1301     *code = op_mr;
1302     }
1303     return SLJIT_SUCCESS;
1304     }
1305    
1306     /* General version. */
1307     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1308     EMIT_MOV(compiler, dst, 0, src1, src1w);
1309     if (src2 & SLJIT_IMM) {
1310     BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1311     }
1312     else {
1313     code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1314     FAIL_IF(!code);
1315     *code = op_rm;
1316     }
1317     }
1318     else {
1319     /* This version requires less memory writing. */
1320     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1321     if (src2 & SLJIT_IMM) {
1322     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1323     }
1324     else {
1325     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1326     FAIL_IF(!code);
1327     *code = op_rm;
1328     }
1329     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1330     }
1331    
1332     return SLJIT_SUCCESS;
1333     }
1334    
1335     static int emit_non_cum_binary(struct sljit_compiler *compiler,
1336     sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1337     int dst, sljit_w dstw,
1338     int src1, sljit_w src1w,
1339     int src2, sljit_w src2w)
1340     {
1341     sljit_ub* code;
1342    
1343     if (dst == SLJIT_UNUSED) {
1344     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1345     if (src2 & SLJIT_IMM) {
1346     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1347     }
1348     else {
1349     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1350     FAIL_IF(!code);
1351     *code = op_rm;
1352     }
1353     return SLJIT_SUCCESS;
1354     }
1355    
1356     if (dst == src1 && dstw == src1w) {
1357     if (src2 & SLJIT_IMM) {
1358     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1359     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1360     #else
1361     if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1362     #endif
1363     BINARY_EAX_IMM(op_eax_imm, src2w);
1364     }
1365     else {
1366     BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1367     }
1368     }
1369     else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1370     code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1371     FAIL_IF(!code);
1372     *code = op_rm;
1373     }
1374     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1375     code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1376     FAIL_IF(!code);
1377     *code = op_mr;
1378     }
1379     else {
1380     EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1381     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1382     FAIL_IF(!code);
1383     *code = op_mr;
1384     }
1385     return SLJIT_SUCCESS;
1386     }
1387    
1388     /* General version. */
1389     if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1390     EMIT_MOV(compiler, dst, 0, src1, src1w);
1391     if (src2 & SLJIT_IMM) {
1392     BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1393     }
1394     else {
1395     code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1396     FAIL_IF(!code);
1397     *code = op_rm;
1398     }
1399     }
1400     else {
1401     /* This version requires less memory writing. */
1402     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1403     if (src2 & SLJIT_IMM) {
1404     BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1405     }
1406     else {
1407     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1408     FAIL_IF(!code);
1409     *code = op_rm;
1410     }
1411     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1412     }
1413    
1414     return SLJIT_SUCCESS;
1415     }
1416    
1417     static int emit_mul(struct sljit_compiler *compiler,
1418     int dst, sljit_w dstw,
1419     int src1, sljit_w src1w,
1420     int src2, sljit_w src2w)
1421     {
1422     sljit_ub* code;
1423     int dst_r;
1424    
1425     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1426    
1427     /* Register destination. */
1428     if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1429     code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1430     FAIL_IF(!code);
1431     *code++ = 0x0f;
1432     *code = 0xaf;
1433     }
1434     else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1435     code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1436     FAIL_IF(!code);
1437     *code++ = 0x0f;
1438     *code = 0xaf;
1439     }
1440     else if (src1 & SLJIT_IMM) {
1441     if (src2 & SLJIT_IMM) {
1442     EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1443     src2 = dst_r;
1444     src2w = 0;
1445     }
1446    
1447     if (src1w <= 127 && src1w >= -128) {
1448     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1449     FAIL_IF(!code);
1450     *code = 0x6b;
1451     code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1452     FAIL_IF(!code);
1453     INC_CSIZE(1);
1454     *code = (sljit_b)src1w;
1455     }
1456     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1457     else {
1458     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1459     FAIL_IF(!code);
1460     *code = 0x69;
1461     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1462     FAIL_IF(!code);
1463     INC_CSIZE(4);
1464     *(sljit_w*)code = src1w;
1465     }
1466     #else
1467     else if (IS_HALFWORD(src1w)) {
1468     code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1469     FAIL_IF(!code);
1470     *code = 0x69;
1471     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1472     FAIL_IF(!code);
1473     INC_CSIZE(4);
1474 ph10 836 *(sljit_hw*)code = (sljit_hw)src1w;
1475 ph10 662 }
1476     else {
1477     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1478     if (dst_r != src2)
1479     EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1480     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1481     FAIL_IF(!code);
1482     *code++ = 0x0f;
1483     *code = 0xaf;
1484     }
1485     #endif
1486     }
1487     else if (src2 & SLJIT_IMM) {
1488     /* Note: src1 is NOT immediate. */
1489    
1490     if (src2w <= 127 && src2w >= -128) {
1491     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1492     FAIL_IF(!code);
1493     *code = 0x6b;
1494     code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1495     FAIL_IF(!code);
1496     INC_CSIZE(1);
1497     *code = (sljit_b)src2w;
1498     }
1499     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1500     else {
1501     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1502     FAIL_IF(!code);
1503     *code = 0x69;
1504     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1505     FAIL_IF(!code);
1506     INC_CSIZE(4);
1507     *(sljit_w*)code = src2w;
1508     }
1509     #else
1510     else if (IS_HALFWORD(src2w)) {
1511     code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1512     FAIL_IF(!code);
1513     *code = 0x69;
1514     code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1515     FAIL_IF(!code);
1516     INC_CSIZE(4);
1517 ph10 836 *(sljit_hw*)code = (sljit_hw)src2w;
1518 ph10 662 }
1519     else {
1520     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1521     if (dst_r != src1)
1522     EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1523     code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1524     FAIL_IF(!code);
1525     *code++ = 0x0f;
1526     *code = 0xaf;
1527     }
1528     #endif
1529     }
1530     else {
1531     /* Neither argument is immediate. */
1532 zherczeg 740 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1533 ph10 662 dst_r = TMP_REGISTER;
1534     EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1535     code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1536     FAIL_IF(!code);
1537     *code++ = 0x0f;
1538     *code = 0xaf;
1539     }
1540    
1541     if (dst_r == TMP_REGISTER)
1542     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1543    
1544     return SLJIT_SUCCESS;
1545     }
1546    
1547     static int emit_lea_binary(struct sljit_compiler *compiler,
1548     int dst, sljit_w dstw,
1549     int src1, sljit_w src1w,
1550     int src2, sljit_w src2w)
1551     {
1552     sljit_ub* code;
1553     int dst_r, done = 0;
1554    
1555     /* These cases better be left to handled by normal way. */
1556     if (dst == src1 && dstw == src1w)
1557     return SLJIT_ERR_UNSUPPORTED;
1558     if (dst == src2 && dstw == src2w)
1559     return SLJIT_ERR_UNSUPPORTED;
1560    
1561     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1562    
1563     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1564     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1565     /* It is not possible to be both SLJIT_LOCALS_REG. */
1566     if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1567     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1568     FAIL_IF(!code);
1569     *code = 0x8d;
1570     done = 1;
1571     }
1572     }
1573     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1574     if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1575     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1576     #else
1577     if (src2 & SLJIT_IMM) {
1578     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1579     #endif
1580     FAIL_IF(!code);
1581     *code = 0x8d;
1582     done = 1;
1583     }
1584     }
1585     else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1586     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1587     if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1588     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1589     #else
1590     if (src1 & SLJIT_IMM) {
1591     code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1592     #endif
1593     FAIL_IF(!code);
1594     *code = 0x8d;
1595     done = 1;
1596     }
1597     }
1598    
1599     if (done) {
1600     if (dst_r == TMP_REGISTER)
1601     return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1602     return SLJIT_SUCCESS;
1603     }
1604     return SLJIT_ERR_UNSUPPORTED;
1605     }
1606    
1607     static int emit_cmp_binary(struct sljit_compiler *compiler,
1608     int src1, sljit_w src1w,
1609     int src2, sljit_w src2w)
1610     {
1611     sljit_ub* code;
1612    
1613     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1614     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1615     #else
1616     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1617     #endif
1618     BINARY_EAX_IMM(0x3d, src2w);
1619     return SLJIT_SUCCESS;
1620     }
1621    
1622     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1623     if (src2 & SLJIT_IMM) {
1624     BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1625     }
1626     else {
1627     code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1628     FAIL_IF(!code);
1629     *code = 0x3b;
1630     }
1631     return SLJIT_SUCCESS;
1632     }
1633    
1634     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1635     code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1636     FAIL_IF(!code);
1637     *code = 0x39;
1638     return SLJIT_SUCCESS;
1639     }
1640    
1641     if (src2 & SLJIT_IMM) {
1642     if (src1 & SLJIT_IMM) {
1643     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1644     src1 = TMP_REGISTER;
1645     src1w = 0;
1646     }
1647     BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1648     }
1649     else {
1650     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1651     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1652     FAIL_IF(!code);
1653     *code = 0x3b;
1654     }
1655     return SLJIT_SUCCESS;
1656     }
1657    
1658     static int emit_test_binary(struct sljit_compiler *compiler,
1659     int src1, sljit_w src1w,
1660     int src2, sljit_w src2w)
1661     {
1662     sljit_ub* code;
1663    
1664     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1665     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1666     #else
1667     if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1668     #endif
1669     BINARY_EAX_IMM(0xa9, src2w);
1670     return SLJIT_SUCCESS;
1671     }
1672    
1673     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1674     if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1675     #else
1676     if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1677     #endif
1678     BINARY_EAX_IMM(0xa9, src1w);
1679     return SLJIT_SUCCESS;
1680     }
1681    
1682     if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1683     if (src2 & SLJIT_IMM) {
1684     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1685     if (IS_HALFWORD(src2w) || compiler->mode32) {
1686     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1687     FAIL_IF(!code);
1688     *code = 0xf7;
1689     }
1690     else {
1691     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1692     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1693     FAIL_IF(!code);
1694     *code = 0x85;
1695     }
1696     #else
1697     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1698     FAIL_IF(!code);
1699     *code = 0xf7;
1700     #endif
1701     }
1702     else {
1703     code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1704     FAIL_IF(!code);
1705     *code = 0x85;
1706     }
1707     return SLJIT_SUCCESS;
1708     }
1709    
1710     if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1711     if (src1 & SLJIT_IMM) {
1712     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1713     if (IS_HALFWORD(src1w) || compiler->mode32) {
1714     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1715     FAIL_IF(!code);
1716     *code = 0xf7;
1717     }
1718     else {
1719     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1720     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1721     FAIL_IF(!code);
1722     *code = 0x85;
1723     }
1724     #else
1725     code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1726     FAIL_IF(!code);
1727     *code = 0xf7;
1728     #endif
1729     }
1730     else {
1731     code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1732     FAIL_IF(!code);
1733     *code = 0x85;
1734     }
1735     return SLJIT_SUCCESS;
1736     }
1737    
1738     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1739     if (src2 & SLJIT_IMM) {
1740     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1741     if (IS_HALFWORD(src2w) || compiler->mode32) {
1742     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1743     FAIL_IF(!code);
1744     *code = 0xf7;
1745     }
1746     else {
1747     FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1748     code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1749     FAIL_IF(!code);
1750     *code = 0x85;
1751     }
1752     #else
1753     code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1754     FAIL_IF(!code);
1755     *code = 0xf7;
1756     #endif
1757     }
1758     else {
1759     code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1760     FAIL_IF(!code);
1761     *code = 0x85;
1762     }
1763     return SLJIT_SUCCESS;
1764     }
1765    
1766     static int emit_shift(struct sljit_compiler *compiler,
1767     sljit_ub mode,
1768     int dst, sljit_w dstw,
1769     int src1, sljit_w src1w,
1770     int src2, sljit_w src2w)
1771     {
1772     sljit_ub* code;
1773    
1774     if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1775     if (dst == src1 && dstw == src1w) {
1776     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1777     FAIL_IF(!code);
1778     *code |= mode;
1779     return SLJIT_SUCCESS;
1780     }
1781     if (dst == SLJIT_UNUSED) {
1782     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1783     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1784     FAIL_IF(!code);
1785     *code |= mode;
1786     return SLJIT_SUCCESS;
1787     }
1788     if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1789     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1790     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1791     FAIL_IF(!code);
1792     *code |= mode;
1793     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1794     return SLJIT_SUCCESS;
1795     }
1796     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1797     EMIT_MOV(compiler, dst, 0, src1, src1w);
1798     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1799     FAIL_IF(!code);
1800     *code |= mode;
1801     return SLJIT_SUCCESS;
1802     }
1803    
1804     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1805     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1806     FAIL_IF(!code);
1807     *code |= mode;
1808     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1809     return SLJIT_SUCCESS;
1810     }
1811    
1812     if (dst == SLJIT_PREF_SHIFT_REG) {
1813     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1814     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1815     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1816     FAIL_IF(!code);
1817     *code |= mode;
1818     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1819     }
1820 zherczeg 740 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1821 ph10 662 if (src1 != dst)
1822     EMIT_MOV(compiler, dst, 0, src1, src1w);
1823     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1824     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1825     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1826     FAIL_IF(!code);
1827     *code |= mode;
1828     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1829     }
1830     else {
1831 zherczeg 860 /* This case is really difficult, since ecx itself may used for
1832     addressing, and we must ensure to work even in that case. */
1833     EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1834 ph10 662 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1835     EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1836     #else
1837     /* [esp - 4] is reserved for eflags. */
1838     EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1839     #endif
1840     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1841     code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1842     FAIL_IF(!code);
1843     *code |= mode;
1844     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1845     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1846     #else
1847     /* [esp - 4] is reserved for eflags. */
1848     EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1849     #endif
1850     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1851     }
1852    
1853     return SLJIT_SUCCESS;
1854     }
1855    
1856 zherczeg 860 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1857     sljit_ub mode, int set_flags,
1858     int dst, sljit_w dstw,
1859     int src1, sljit_w src1w,
1860     int src2, sljit_w src2w)
1861     {
1862     /* The CPU does not set flags if the shift count is 0. */
1863     if (src2 & SLJIT_IMM) {
1864     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1865     if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1866     return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1867     #else
1868     if ((src2w & 0x1f) != 0)
1869     return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1870     #endif
1871     if (!set_flags)
1872     return emit_mov(compiler, dst, dstw, src1, src1w);
1873     /* OR dst, src, 0 */
1874     return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1875     dst, dstw, src1, src1w, SLJIT_IMM, 0);
1876     }
1877    
1878     if (!set_flags)
1879     return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1880    
1881     if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1882     FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1883    
1884     FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1885    
1886     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1887     return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1888     return SLJIT_SUCCESS;
1889     }
1890    
1891 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1892 ph10 662 int dst, sljit_w dstw,
1893     int src1, sljit_w src1w,
1894     int src2, sljit_w src2w)
1895     {
1896     CHECK_ERROR();
1897     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1898    
1899     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1900     compiler->mode32 = op & SLJIT_INT_OP;
1901     #endif
1902     CHECK_EXTRA_REGS(dst, dstw, (void)0);
1903     CHECK_EXTRA_REGS(src1, src1w, (void)0);
1904     CHECK_EXTRA_REGS(src2, src2w, (void)0);
1905    
1906     if (GET_OPCODE(op) >= SLJIT_MUL) {
1907     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1908     compiler->flags_saved = 0;
1909     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1910     FAIL_IF(emit_save_flags(compiler));
1911     }
1912    
1913     switch (GET_OPCODE(op)) {
1914     case SLJIT_ADD:
1915     if (!GET_FLAGS(op)) {
1916     if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1917     return compiler->error;
1918     }
1919     else
1920     compiler->flags_saved = 0;
1921     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1922     FAIL_IF(emit_save_flags(compiler));
1923     return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1924     dst, dstw, src1, src1w, src2, src2w);
1925     case SLJIT_ADDC:
1926     if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1927     FAIL_IF(emit_restore_flags(compiler, 1));
1928     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1929     FAIL_IF(emit_save_flags(compiler));
1930     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1931     compiler->flags_saved = 0;
1932     return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1933     dst, dstw, src1, src1w, src2, src2w);
1934     case SLJIT_SUB:
1935     if (!GET_FLAGS(op)) {
1936     if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1937     return compiler->error;
1938     }
1939     else
1940     compiler->flags_saved = 0;
1941     if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1942     FAIL_IF(emit_save_flags(compiler));
1943     if (dst == SLJIT_UNUSED)
1944     return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1945     return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1946     dst, dstw, src1, src1w, src2, src2w);
1947     case SLJIT_SUBC:
1948     if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1949     FAIL_IF(emit_restore_flags(compiler, 1));
1950     else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1951     FAIL_IF(emit_save_flags(compiler));
1952     if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1953     compiler->flags_saved = 0;
1954     return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1955     dst, dstw, src1, src1w, src2, src2w);
1956     case SLJIT_MUL:
1957     return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1958     case SLJIT_AND:
1959     if (dst == SLJIT_UNUSED)
1960     return emit_test_binary(compiler, src1, src1w, src2, src2w);
1961     return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1962     dst, dstw, src1, src1w, src2, src2w);
1963     case SLJIT_OR:
1964     return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1965     dst, dstw, src1, src1w, src2, src2w);
1966     case SLJIT_XOR:
1967     return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1968     dst, dstw, src1, src1w, src2, src2w);
1969     case SLJIT_SHL:
1970 zherczeg 860 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1971 ph10 662 dst, dstw, src1, src1w, src2, src2w);
1972     case SLJIT_LSHR:
1973 zherczeg 860 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1974 ph10 662 dst, dstw, src1, src1w, src2, src2w);
1975     case SLJIT_ASHR:
1976 zherczeg 860 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1977 ph10 662 dst, dstw, src1, src1w, src2, src2w);
1978     }
1979    
1980     return SLJIT_SUCCESS;
1981     }
1982    
1983 zherczeg 839 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1984     {
1985     check_sljit_get_register_index(reg);
1986     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1987     if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1988 zherczeg 880 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
1989 zherczeg 839 return -1;
1990     #endif
1991     return reg_map[reg];
1992     }
1993    
1994     SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1995     void *instruction, int size)
1996     {
1997     sljit_ub *buf;
1998    
1999     CHECK_ERROR();
2000     check_sljit_emit_op_custom(compiler, instruction, size);
2001     SLJIT_ASSERT(size > 0 && size < 16);
2002    
2003     buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2004     FAIL_IF(!buf);
2005     INC_SIZE(size);
2006     SLJIT_MEMMOVE(buf, instruction, size);
2007     return SLJIT_SUCCESS;
2008     }
2009    
2010 ph10 662 /* --------------------------------------------------------------------- */
2011     /* Floating point operators */
2012     /* --------------------------------------------------------------------- */
2013    
2014     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2015     static int sse2_available = 0;
2016     #endif
2017    
2018     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2019    
2020     /* Alignment + 2 * 16 bytes. */
2021 zherczeg 704 static sljit_i sse2_data[3 + 4 + 4];
2022     static sljit_i *sse2_buffer;
2023 ph10 662
2024     static void init_compiler()
2025     {
2026     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2027     int features = 0;
2028     #endif
2029    
2030 zherczeg 704 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2031 ph10 662 sse2_buffer[0] = 0;
2032     sse2_buffer[1] = 0x80000000;
2033     sse2_buffer[4] = 0xffffffff;
2034     sse2_buffer[5] = 0x7fffffff;
2035    
2036     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2037     #ifdef __GNUC__
2038     /* AT&T syntax. */
2039     asm (
2040     "pushl %%ebx\n"
2041     "movl $0x1, %%eax\n"
2042     "cpuid\n"
2043     "popl %%ebx\n"
2044     "movl %%edx, %0\n"
2045     : "=g" (features)
2046     :
2047     : "%eax", "%ecx", "%edx"
2048     );
2049 zherczeg 704 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2050 ph10 662 /* Intel syntax. */
2051     __asm {
2052     mov eax, 1
2053     push ebx
2054     cpuid
2055     pop ebx
2056     mov features, edx
2057     }
2058     #else
2059     #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2060     #endif
2061     sse2_available = (features >> 26) & 0x1;
2062     #endif
2063     }
2064    
2065     #endif
2066    
2067 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2068 ph10 662 {
2069     /* Always available. */
2070     return 1;
2071     }
2072    
2073     #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2074    
2075     static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2076     int xmm1, int xmm2, sljit_w xmm2w)
2077     {
2078     sljit_ub *buf;
2079    
2080     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2081     FAIL_IF(!buf);
2082     *buf++ = 0x0f;
2083     *buf = opcode;
2084     return SLJIT_SUCCESS;
2085     }
2086    
2087     static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2088     int xmm1, int xmm2, sljit_w xmm2w)
2089     {
2090     sljit_ub *buf;
2091    
2092     buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2093     FAIL_IF(!buf);
2094     *buf++ = 0x0f;
2095     *buf = opcode;
2096     return SLJIT_SUCCESS;
2097     }
2098    
2099     static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2100     int dst, int src, sljit_w srcw)
2101     {
2102     return emit_sse2(compiler, 0x10, dst, src, srcw);
2103     }
2104    
2105     static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2106     int dst, sljit_w dstw, int src)
2107     {
2108     return emit_sse2(compiler, 0x11, src, dst, dstw);
2109     }
2110    
2111     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2112 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2113 ph10 662 #else
2114     static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2115     #endif
2116     int dst, sljit_w dstw,
2117     int src, sljit_w srcw)
2118     {
2119     int dst_r;
2120    
2121     CHECK_ERROR();
2122     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2123    
2124     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2125     compiler->mode32 = 1;
2126     #endif
2127    
2128     if (GET_OPCODE(op) == SLJIT_FCMP) {
2129     compiler->flags_saved = 0;
2130     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2131     dst_r = dst;
2132     else {
2133     dst_r = TMP_FREG;
2134     FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2135     }
2136     return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2137     }
2138    
2139     if (op == SLJIT_FMOV) {
2140     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2141     return emit_sse2_load(compiler, dst, src, srcw);
2142     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2143     return emit_sse2_store(compiler, dst, dstw, src);
2144     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2145     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2146     }
2147    
2148     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2149     dst_r = dst;
2150     if (dst != src)
2151     FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2152     }
2153     else {
2154     dst_r = TMP_FREG;
2155     FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2156     }
2157    
2158     switch (op) {
2159     case SLJIT_FNEG:
2160     FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2161     break;
2162    
2163     case SLJIT_FABS:
2164     FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2165     break;
2166     }
2167    
2168     if (dst_r == TMP_FREG)
2169     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2170     return SLJIT_SUCCESS;
2171     }
2172    
2173     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2174 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2175 ph10 662 #else
2176     static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2177     #endif
2178     int dst, sljit_w dstw,
2179     int src1, sljit_w src1w,
2180     int src2, sljit_w src2w)
2181     {
2182     int dst_r;
2183    
2184     CHECK_ERROR();
2185     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2186    
2187     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2188     compiler->mode32 = 1;
2189     #endif
2190    
2191     if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2192     dst_r = dst;
2193     if (dst == src1)
2194     ; /* Do nothing here. */
2195     else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2196     /* Swap arguments. */
2197     src2 = src1;
2198     src2w = src1w;
2199     }
2200     else if (dst != src2)
2201     FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2202     else {
2203     dst_r = TMP_FREG;
2204     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2205     }
2206     }
2207     else {
2208     dst_r = TMP_FREG;
2209     FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2210     }
2211    
2212     switch (op) {
2213     case SLJIT_FADD:
2214     FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2215     break;
2216    
2217     case SLJIT_FSUB:
2218     FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2219     break;
2220    
2221     case SLJIT_FMUL:
2222     FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2223     break;
2224    
2225     case SLJIT_FDIV:
2226     FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2227     break;
2228     }
2229    
2230     if (dst_r == TMP_FREG)
2231     return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2232     return SLJIT_SUCCESS;
2233     }
2234    
2235     #endif
2236    
2237     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2238    
2239     static int emit_fld(struct sljit_compiler *compiler,
2240     int src, sljit_w srcw)
2241     {
2242     sljit_ub *buf;
2243    
2244     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2245     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2246     FAIL_IF(!buf);
2247     INC_SIZE(2);
2248     *buf++ = 0xd9;
2249     *buf = 0xc0 + src - 1;
2250     return SLJIT_SUCCESS;
2251     }
2252    
2253     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2254     FAIL_IF(!buf);
2255     *buf = 0xdd;
2256     return SLJIT_SUCCESS;
2257     }
2258    
2259     static int emit_fop(struct sljit_compiler *compiler,
2260     sljit_ub st_arg, sljit_ub st_arg2,
2261     sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2262     int src, sljit_w srcw)
2263     {
2264     sljit_ub *buf;
2265    
2266     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2267     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2268     FAIL_IF(!buf);
2269     INC_SIZE(2);
2270     *buf++ = st_arg;
2271     *buf = st_arg2 + src;
2272     return SLJIT_SUCCESS;
2273     }
2274    
2275     buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2276     FAIL_IF(!buf);
2277     *buf++ = m64fp_arg;
2278     *buf |= m64fp_arg2;
2279     return SLJIT_SUCCESS;
2280     }
2281    
2282     static int emit_fop_regs(struct sljit_compiler *compiler,
2283     sljit_ub st_arg, sljit_ub st_arg2,
2284     int src)
2285     {
2286     sljit_ub *buf;
2287    
2288     buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2289     FAIL_IF(!buf);
2290     INC_SIZE(2);
2291     *buf++ = st_arg;
2292     *buf = st_arg2 + src;
2293     return SLJIT_SUCCESS;
2294     }
2295    
2296     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2297 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2298 ph10 662 #else
2299     static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2300     #endif
2301     int dst, sljit_w dstw,
2302     int src, sljit_w srcw)
2303     {
2304     #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2305     sljit_ub *buf;
2306     #endif
2307    
2308     CHECK_ERROR();
2309     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2310    
2311     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2312     compiler->mode32 = 1;
2313     #endif
2314    
2315     if (GET_OPCODE(op) == SLJIT_FCMP) {
2316     compiler->flags_saved = 0;
2317     #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2318     FAIL_IF(emit_fld(compiler, dst, dstw));
2319     FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2320    
2321     /* Copy flags. */
2322     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2323     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2324     FAIL_IF(!buf);
2325     INC_SIZE(3);
2326     *buf++ = 0xdf;
2327     *buf++ = 0xe0;
2328     /* Note: lahf is not supported on all x86-64 architectures. */
2329     *buf++ = 0x9e;
2330     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2331     #else
2332     if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2333     FAIL_IF(emit_fld(compiler, dst, dstw));
2334     FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2335     } else {
2336     FAIL_IF(emit_fld(compiler, src, srcw));
2337     FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2338     FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2339     FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2340     }
2341     #endif
2342     return SLJIT_SUCCESS;
2343     }
2344    
2345     FAIL_IF(emit_fld(compiler, src, srcw));
2346    
2347     switch (op) {
2348     case SLJIT_FNEG:
2349     FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2350     break;
2351     case SLJIT_FABS:
2352     FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2353     break;
2354     }
2355    
2356     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2357    
2358     return SLJIT_SUCCESS;
2359     }
2360    
2361     #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2362 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2363 ph10 662 #else
2364     static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2365     #endif
2366     int dst, sljit_w dstw,
2367     int src1, sljit_w src1w,
2368     int src2, sljit_w src2w)
2369     {
2370     CHECK_ERROR();
2371     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2372    
2373     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2374     compiler->mode32 = 1;
2375     #endif
2376    
2377     if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2378     FAIL_IF(emit_fld(compiler, src2, src2w));
2379    
2380     switch (op) {
2381     case SLJIT_FADD:
2382     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2383     break;
2384     case SLJIT_FSUB:
2385     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2386     break;
2387     case SLJIT_FMUL:
2388     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2389     break;
2390     case SLJIT_FDIV:
2391     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2392     break;
2393     }
2394     return SLJIT_SUCCESS;
2395     }
2396    
2397     FAIL_IF(emit_fld(compiler, src1, src1w));
2398    
2399     if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2400     switch (op) {
2401     case SLJIT_FADD:
2402     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2403     break;
2404     case SLJIT_FSUB:
2405     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2406     break;
2407     case SLJIT_FMUL:
2408     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2409     break;
2410     case SLJIT_FDIV:
2411     FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2412     break;
2413     }
2414     return SLJIT_SUCCESS;
2415     }
2416    
2417     switch (op) {
2418     case SLJIT_FADD:
2419     FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2420     break;
2421     case SLJIT_FSUB:
2422     FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2423     break;
2424     case SLJIT_FMUL:
2425     FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2426     break;
2427     case SLJIT_FDIV:
2428     FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2429     break;
2430     }
2431    
2432     FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2433    
2434     return SLJIT_SUCCESS;
2435     }
2436     #endif
2437    
2438     #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2439    
2440 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2441 ph10 662 int dst, sljit_w dstw,
2442     int src, sljit_w srcw)
2443     {
2444     if (sse2_available)
2445     return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2446     else
2447     return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2448     }
2449    
2450 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2451 ph10 662 int dst, sljit_w dstw,
2452     int src1, sljit_w src1w,
2453     int src2, sljit_w src2w)
2454     {
2455     if (sse2_available)
2456     return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2457     else
2458     return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2459     }
2460    
2461     #endif
2462    
2463     /* --------------------------------------------------------------------- */
2464     /* Conditional instructions */
2465     /* --------------------------------------------------------------------- */
2466    
2467 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2468 ph10 662 {
2469     sljit_ub *buf;
2470     struct sljit_label *label;
2471    
2472     CHECK_ERROR_PTR();
2473     check_sljit_emit_label(compiler);
2474    
2475     /* We should restore the flags before the label,
2476     since other taken jumps has their own flags as well. */
2477     if (SLJIT_UNLIKELY(compiler->flags_saved))
2478     PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2479    
2480     if (compiler->last_label && compiler->last_label->size == compiler->size)
2481     return compiler->last_label;
2482    
2483     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2484     PTR_FAIL_IF(!label);
2485     set_label(label, compiler);
2486    
2487     buf = (sljit_ub*)ensure_buf(compiler, 2);
2488     PTR_FAIL_IF(!buf);
2489    
2490     *buf++ = 0;
2491     *buf++ = 0;
2492    
2493     return label;
2494     }
2495    
2496 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2497 ph10 662 {
2498     sljit_ub *buf;
2499     struct sljit_jump *jump;
2500    
2501     CHECK_ERROR_PTR();
2502     check_sljit_emit_jump(compiler, type);
2503    
2504     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2505     if ((type & 0xff) <= SLJIT_JUMP)
2506     PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2507     compiler->flags_saved = 0;
2508     }
2509    
2510     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2511     PTR_FAIL_IF_NULL(jump);
2512     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2513     type &= 0xff;
2514    
2515     if (type >= SLJIT_CALL1)
2516     PTR_FAIL_IF(call_with_args(compiler, type));
2517    
2518     /* Worst case size. */
2519     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2520     compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2521     #else
2522     compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2523     #endif
2524    
2525     buf = (sljit_ub*)ensure_buf(compiler, 2);
2526     PTR_FAIL_IF_NULL(buf);
2527    
2528     *buf++ = 0;
2529     *buf++ = type + 4;
2530     return jump;
2531     }
2532    
2533 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2534 ph10 662 {
2535     sljit_ub *code;
2536     struct sljit_jump *jump;
2537    
2538     CHECK_ERROR();
2539     check_sljit_emit_ijump(compiler, type, src, srcw);
2540    
2541     CHECK_EXTRA_REGS(src, srcw, (void)0);
2542     if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2543     if (type <= SLJIT_JUMP)
2544     FAIL_IF(emit_restore_flags(compiler, 0));
2545     compiler->flags_saved = 0;
2546     }
2547    
2548     if (type >= SLJIT_CALL1) {
2549     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2550     #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2551     if (src == SLJIT_TEMPORARY_REG3) {
2552     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2553     src = TMP_REGISTER;
2554     }
2555     if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2556     if (src & 0xf0) {
2557     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2558     src = TMP_REGISTER;
2559     }
2560     else
2561     srcw += sizeof(sljit_w);
2562     }
2563     #else
2564     if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2565     if (src & 0xf0) {
2566     EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2567     src = TMP_REGISTER;
2568     }
2569     else
2570     srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2571     }
2572     #endif
2573     #endif
2574     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2575     if (src == SLJIT_TEMPORARY_REG3) {
2576     EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2577     src = TMP_REGISTER;
2578     }
2579     #endif
2580     FAIL_IF(call_with_args(compiler, type));
2581     }
2582    
2583     if (src == SLJIT_IMM) {
2584     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2585     FAIL_IF_NULL(jump);
2586     set_jump(jump, compiler, JUMP_ADDR);
2587     jump->u.target = srcw;
2588    
2589     /* Worst case size. */
2590     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2591     compiler->size += 5;
2592     #else
2593     compiler->size += 10 + 3;
2594     #endif
2595    
2596     code = (sljit_ub*)ensure_buf(compiler, 2);
2597     FAIL_IF_NULL(code);
2598    
2599     *code++ = 0;
2600     *code++ = type + 4;
2601     }
2602     else {
2603     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2604     /* REX_W is not necessary (src is not immediate). */
2605     compiler->mode32 = 1;
2606     #endif
2607     code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2608     FAIL_IF(!code);
2609     *code++ = 0xff;
2610 zherczeg 722 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2611 ph10 662 }
2612     return SLJIT_SUCCESS;
2613     }
2614    
2615 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2616 ph10 662 {
2617     sljit_ub *buf;
2618     sljit_ub cond_set = 0;
2619     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2620     int reg;
2621     #endif
2622    
2623     CHECK_ERROR();
2624     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2625    
2626     if (dst == SLJIT_UNUSED)
2627     return SLJIT_SUCCESS;
2628    
2629     CHECK_EXTRA_REGS(dst, dstw, (void)0);
2630     if (SLJIT_UNLIKELY(compiler->flags_saved))
2631     FAIL_IF(emit_restore_flags(compiler, 0));
2632    
2633     switch (type) {
2634     case SLJIT_C_EQUAL:
2635     case SLJIT_C_FLOAT_EQUAL:
2636     cond_set = 0x94;
2637     break;
2638    
2639     case SLJIT_C_NOT_EQUAL:
2640     case SLJIT_C_FLOAT_NOT_EQUAL:
2641     cond_set = 0x95;
2642     break;
2643    
2644     case SLJIT_C_LESS:
2645     case SLJIT_C_FLOAT_LESS:
2646     cond_set = 0x92;
2647     break;
2648    
2649     case SLJIT_C_GREATER_EQUAL:
2650     case SLJIT_C_FLOAT_GREATER_EQUAL:
2651     cond_set = 0x93;
2652     break;
2653    
2654     case SLJIT_C_GREATER:
2655     case SLJIT_C_FLOAT_GREATER:
2656     cond_set = 0x97;
2657     break;
2658    
2659     case SLJIT_C_LESS_EQUAL:
2660     case SLJIT_C_FLOAT_LESS_EQUAL:
2661     cond_set = 0x96;
2662     break;
2663    
2664     case SLJIT_C_SIG_LESS:
2665     cond_set = 0x9c;
2666     break;
2667    
2668     case SLJIT_C_SIG_GREATER_EQUAL:
2669     cond_set = 0x9d;
2670     break;
2671    
2672     case SLJIT_C_SIG_GREATER:
2673     cond_set = 0x9f;
2674     break;
2675    
2676     case SLJIT_C_SIG_LESS_EQUAL:
2677     cond_set = 0x9e;
2678     break;
2679    
2680     case SLJIT_C_OVERFLOW:
2681     case SLJIT_C_MUL_OVERFLOW:
2682     cond_set = 0x90;
2683     break;
2684    
2685     case SLJIT_C_NOT_OVERFLOW:
2686     case SLJIT_C_MUL_NOT_OVERFLOW:
2687     cond_set = 0x91;
2688     break;
2689    
2690     case SLJIT_C_FLOAT_NAN:
2691     cond_set = 0x9a;
2692     break;
2693    
2694     case SLJIT_C_FLOAT_NOT_NAN:
2695     cond_set = 0x9b;
2696     break;
2697     }
2698    
2699     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2700     reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2701    
2702     buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2703     FAIL_IF(!buf);
2704     INC_SIZE(4 + 4);
2705     /* Set low register to conditional flag. */
2706     *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2707     *buf++ = 0x0f;
2708     *buf++ = cond_set;
2709     *buf++ = 0xC0 | reg_lmap[reg];
2710     *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2711     *buf++ = 0x0f;
2712     *buf++ = 0xb6;
2713     *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2714    
2715     if (reg == TMP_REGISTER) {
2716     if (op == SLJIT_MOV) {
2717     compiler->mode32 = 0;
2718     EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2719     }
2720     else {
2721     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2722     compiler->skip_checks = 1;
2723     #endif
2724     return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2725     }
2726     }
2727     #else
2728     if (op == SLJIT_MOV) {
2729     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2730     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2731     FAIL_IF(!buf);
2732     INC_SIZE(3 + 3);
2733     /* Set low byte to conditional flag. */
2734     *buf++ = 0x0f;
2735     *buf++ = cond_set;
2736     *buf++ = 0xC0 | reg_map[dst];
2737    
2738     *buf++ = 0x0f;
2739     *buf++ = 0xb6;
2740     *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2741     }
2742     else {
2743     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2744    
2745     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2746     FAIL_IF(!buf);
2747     INC_SIZE(3 + 3);
2748     /* Set al to conditional flag. */
2749     *buf++ = 0x0f;
2750     *buf++ = cond_set;
2751     *buf++ = 0xC0;
2752    
2753     *buf++ = 0x0f;
2754     *buf++ = 0xb6;
2755 zherczeg 880 if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
2756 ph10 662 *buf = 0xC0 | (reg_map[dst] << 3);
2757     else {
2758     *buf = 0xC0;
2759     EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2760     }
2761    
2762     EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2763     }
2764     }
2765     else {
2766     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2767     EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2768     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2769     FAIL_IF(!buf);
2770     INC_SIZE(3);
2771    
2772     *buf++ = 0x0f;
2773     *buf++ = cond_set;
2774     *buf++ = 0xC0 | reg_map[dst];
2775     }
2776     else {
2777     EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2778    
2779     buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2780     FAIL_IF(!buf);
2781     INC_SIZE(3 + 3 + 1);
2782     /* Set al to conditional flag. */
2783     *buf++ = 0x0f;
2784     *buf++ = cond_set;
2785     *buf++ = 0xC0;
2786    
2787     *buf++ = 0x0f;
2788     *buf++ = 0xb6;
2789     *buf++ = 0xC0;
2790    
2791     *buf++ = 0x90 + reg_map[TMP_REGISTER];
2792     }
2793     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2794     compiler->skip_checks = 1;
2795     #endif
2796     return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2797     }
2798     #endif
2799    
2800     return SLJIT_SUCCESS;
2801     }
2802    
2803 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2804 ph10 662 {
2805     sljit_ub *buf;
2806     struct sljit_const *const_;
2807     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2808     int reg;
2809     #endif
2810    
2811     CHECK_ERROR_PTR();
2812     check_sljit_emit_const(compiler, dst, dstw, init_value);
2813    
2814     CHECK_EXTRA_REGS(dst, dstw, (void)0);
2815    
2816     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2817     PTR_FAIL_IF(!const_);
2818     set_const(const_, compiler);
2819    
2820     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2821     compiler->mode32 = 0;
2822     reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2823    
2824     if (emit_load_imm64(compiler, reg, init_value))
2825     return NULL;
2826     #else
2827     if (dst == SLJIT_UNUSED)
2828     dst = TMP_REGISTER;
2829    
2830     if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2831     return NULL;
2832     #endif
2833    
2834     buf = (sljit_ub*)ensure_buf(compiler, 2);
2835     PTR_FAIL_IF(!buf);
2836    
2837     *buf++ = 0;
2838     *buf++ = 1;
2839    
2840     #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2841     if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2842     if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2843     return NULL;
2844     #endif
2845    
2846     return const_;
2847     }
2848    
2849 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2850 ph10 662 {
2851     #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2852     *(sljit_w*)addr = new_addr - (addr + 4);
2853     #else
2854     *(sljit_uw*)addr = new_addr;
2855     #endif
2856     }
2857    
2858 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2859 ph10 662 {
2860     *(sljit_w*)addr = new_constant;
2861     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12