/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 906 - (show annotations) (download)
Tue Jan 24 09:55:16 2012 UTC (2 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 79831 byte(s)
opcode lengths must be private for printint and fixed some compiler warnings
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 do; \
78 } \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 do; \
83 }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
112 };
113 #endif
114
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
120
121 typedef unsigned int sljit_uhw;
122 typedef int sljit_hw;
123
124 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
125 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
132 #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
133 #endif
134
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS 0x0010
137 #define EX86_SHIFT_INS 0x0020
138 #define EX86_REX 0x0040
139 #define EX86_NO_REXW 0x0080
140 #define EX86_BYTE_ARG 0x0100
141 #define EX86_HALF_ARG 0x0200
142 #define EX86_PREF_66 0x0400
143
144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
145 #define EX86_PREF_F2 0x0800
146 #define EX86_SSE2 0x1000
147 #endif
148
149 #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
150 #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
151
152 #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
153 #define POP_REG(r) (*buf++ = (0x58 + (r)))
154 #define RET() (*buf++ = (0xc3))
155 #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
156 /* r32, r/m32 */
157 #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
158
159 static sljit_ub get_jump_code(int type)
160 {
161 switch (type) {
162 case SLJIT_C_EQUAL:
163 case SLJIT_C_FLOAT_EQUAL:
164 return 0x84;
165
166 case SLJIT_C_NOT_EQUAL:
167 case SLJIT_C_FLOAT_NOT_EQUAL:
168 return 0x85;
169
170 case SLJIT_C_LESS:
171 case SLJIT_C_FLOAT_LESS:
172 return 0x82;
173
174 case SLJIT_C_GREATER_EQUAL:
175 case SLJIT_C_FLOAT_GREATER_EQUAL:
176 return 0x83;
177
178 case SLJIT_C_GREATER:
179 case SLJIT_C_FLOAT_GREATER:
180 return 0x87;
181
182 case SLJIT_C_LESS_EQUAL:
183 case SLJIT_C_FLOAT_LESS_EQUAL:
184 return 0x86;
185
186 case SLJIT_C_SIG_LESS:
187 return 0x8c;
188
189 case SLJIT_C_SIG_GREATER_EQUAL:
190 return 0x8d;
191
192 case SLJIT_C_SIG_GREATER:
193 return 0x8f;
194
195 case SLJIT_C_SIG_LESS_EQUAL:
196 return 0x8e;
197
198 case SLJIT_C_OVERFLOW:
199 case SLJIT_C_MUL_OVERFLOW:
200 return 0x80;
201
202 case SLJIT_C_NOT_OVERFLOW:
203 case SLJIT_C_MUL_NOT_OVERFLOW:
204 return 0x81;
205
206 case SLJIT_C_FLOAT_NAN:
207 return 0x8a;
208
209 case SLJIT_C_FLOAT_NOT_NAN:
210 return 0x8b;
211 }
212 return 0;
213 }
214
215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
216
217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
219 #endif
220
221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
222 {
223 int short_jump;
224 sljit_uw label_addr;
225
226 if (jump->flags & JUMP_LABEL)
227 label_addr = (sljit_uw)(code + jump->u.label->size);
228 else
229 label_addr = jump->u.target;
230 short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
231
232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
233 if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
234 return generate_far_jump_code(jump, code_ptr, type);
235 #endif
236
237 if (type == SLJIT_JUMP) {
238 if (short_jump)
239 *code_ptr++ = 0xeb;
240 else
241 *code_ptr++ = 0xe9;
242 jump->addr++;
243 }
244 else if (type >= SLJIT_FAST_CALL) {
245 short_jump = 0;
246 *code_ptr++ = 0xe8;
247 jump->addr++;
248 }
249 else if (short_jump) {
250 *code_ptr++ = get_jump_code(type) - 0x10;
251 jump->addr++;
252 }
253 else {
254 *code_ptr++ = 0x0f;
255 *code_ptr++ = get_jump_code(type);
256 jump->addr += 2;
257 }
258
259 if (short_jump) {
260 jump->flags |= PATCH_MB;
261 code_ptr += sizeof(sljit_b);
262 } else {
263 jump->flags |= PATCH_MW;
264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
265 code_ptr += sizeof(sljit_w);
266 #else
267 code_ptr += sizeof(sljit_hw);
268 #endif
269 }
270
271 return code_ptr;
272 }
273
274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
275 {
276 struct sljit_memory_fragment *buf;
277 sljit_ub *code;
278 sljit_ub *code_ptr;
279 sljit_ub *buf_ptr;
280 sljit_ub *buf_end;
281 sljit_ub len;
282
283 struct sljit_label *label;
284 struct sljit_jump *jump;
285 struct sljit_const *const_;
286
287 CHECK_ERROR_PTR();
288 check_sljit_generate_code(compiler);
289 reverse_buf(compiler);
290
291 /* Second code generation pass. */
292 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
293 PTR_FAIL_WITH_EXEC_IF(code);
294 buf = compiler->buf;
295
296 code_ptr = code;
297 label = compiler->labels;
298 jump = compiler->jumps;
299 const_ = compiler->consts;
300 do {
301 buf_ptr = buf->memory;
302 buf_end = buf_ptr + buf->used_size;
303 do {
304 len = *buf_ptr++;
305 if (len > 0) {
306 /* The code is already generated. */
307 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
308 code_ptr += len;
309 buf_ptr += len;
310 }
311 else {
312 if (*buf_ptr >= 4) {
313 jump->addr = (sljit_uw)code_ptr;
314 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
315 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
316 else
317 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
318 jump = jump->next;
319 }
320 else if (*buf_ptr == 0) {
321 label->addr = (sljit_uw)code_ptr;
322 label->size = code_ptr - code;
323 label = label->next;
324 }
325 else if (*buf_ptr == 1) {
326 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
327 const_ = const_->next;
328 }
329 else {
330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
331 *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
332 buf_ptr++;
333 *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
334 code_ptr += sizeof(sljit_w);
335 buf_ptr += sizeof(sljit_w) - 1;
336 #else
337 code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
338 buf_ptr += sizeof(sljit_w);
339 #endif
340 }
341 buf_ptr++;
342 }
343 } while (buf_ptr < buf_end);
344 SLJIT_ASSERT(buf_ptr == buf_end);
345 buf = buf->next;
346 } while (buf);
347
348 SLJIT_ASSERT(!label);
349 SLJIT_ASSERT(!jump);
350 SLJIT_ASSERT(!const_);
351
352 jump = compiler->jumps;
353 while (jump) {
354 if (jump->flags & PATCH_MB) {
355 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
356 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
357 } else if (jump->flags & PATCH_MW) {
358 if (jump->flags & JUMP_LABEL) {
359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
360 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
361 #else
362 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
363 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
364 #endif
365 }
366 else {
367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
368 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
369 #else
370 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
371 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
372 #endif
373 }
374 }
375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
376 else if (jump->flags & PATCH_MD)
377 *(sljit_w*)jump->addr = jump->u.label->addr;
378 #endif
379
380 jump = jump->next;
381 }
382
383 /* Maybe we waste some space because of short jumps. */
384 SLJIT_ASSERT(code_ptr <= code + compiler->size);
385 compiler->error = SLJIT_ERR_COMPILED;
386 compiler->executable_size = compiler->size;
387 return (void*)code;
388 }
389
390 /* --------------------------------------------------------------------- */
391 /* Operators */
392 /* --------------------------------------------------------------------- */
393
394 static int emit_cum_binary(struct sljit_compiler *compiler,
395 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
396 int dst, sljit_w dstw,
397 int src1, sljit_w src1w,
398 int src2, sljit_w src2w);
399
400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
401 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
402 int dst, sljit_w dstw,
403 int src1, sljit_w src1w,
404 int src2, sljit_w src2w);
405
406 static int emit_mov(struct sljit_compiler *compiler,
407 int dst, sljit_w dstw,
408 int src, sljit_w srcw);
409
410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
411 {
412 sljit_ub *buf;
413
414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
415 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
416 FAIL_IF(!buf);
417 INC_SIZE(5);
418 *buf++ = 0x9c; /* pushfd */
419 #else
420 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
421 FAIL_IF(!buf);
422 INC_SIZE(6);
423 *buf++ = 0x9c; /* pushfq */
424 *buf++ = 0x48;
425 #endif
426 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
427 *buf++ = 0x64;
428 *buf++ = 0x24;
429 *buf++ = sizeof(sljit_w);
430 compiler->flags_saved = 1;
431 return SLJIT_SUCCESS;
432 }
433
434 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
435 {
436 sljit_ub *buf;
437
438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
439 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
440 FAIL_IF(!buf);
441 INC_SIZE(5);
442 #else
443 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
444 FAIL_IF(!buf);
445 INC_SIZE(6);
446 *buf++ = 0x48;
447 #endif
448 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
449 *buf++ = 0x64;
450 *buf++ = 0x24;
451 *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
452 *buf++ = 0x9d; /* popfd / popfq */
453 compiler->flags_saved = keep_flags;
454 return SLJIT_SUCCESS;
455 }
456
457 #ifdef _WIN32
458 #include <malloc.h>
459
460 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
461 {
462 /* Workaround for calling _chkstk. */
463 alloca(local_size);
464 }
465 #endif
466
467 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
468 #include "sljitNativeX86_32.c"
469 #else
470 #include "sljitNativeX86_64.c"
471 #endif
472
473 static int emit_mov(struct sljit_compiler *compiler,
474 int dst, sljit_w dstw,
475 int src, sljit_w srcw)
476 {
477 sljit_ub* code;
478
479 if (dst == SLJIT_UNUSED) {
480 /* No destination, doesn't need to setup flags. */
481 if (src & SLJIT_MEM) {
482 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
483 FAIL_IF(!code);
484 *code = 0x8b;
485 }
486 return SLJIT_SUCCESS;
487 }
488 if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
489 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
490 FAIL_IF(!code);
491 *code = 0x89;
492 return SLJIT_SUCCESS;
493 }
494 if (src & SLJIT_IMM) {
495 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
497 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
498 #else
499 if (!compiler->mode32) {
500 if (NOT_HALFWORD(srcw))
501 return emit_load_imm64(compiler, dst, srcw);
502 }
503 else
504 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
505 #endif
506 }
507 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
508 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
509 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
510 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
511 FAIL_IF(!code);
512 *code = 0x89;
513 return SLJIT_SUCCESS;
514 }
515 #endif
516 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
517 FAIL_IF(!code);
518 *code = 0xc7;
519 return SLJIT_SUCCESS;
520 }
521 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
522 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
523 FAIL_IF(!code);
524 *code = 0x8b;
525 return SLJIT_SUCCESS;
526 }
527
528 /* Memory to memory move. Requires two instruction. */
529 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
530 FAIL_IF(!code);
531 *code = 0x8b;
532 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
533 FAIL_IF(!code);
534 *code = 0x89;
535 return SLJIT_SUCCESS;
536 }
537
538 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
539 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
540
541 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
542 {
543 sljit_ub *buf;
544 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
545 int size;
546 #endif
547
548 CHECK_ERROR();
549 check_sljit_emit_op0(compiler, op);
550
551 switch (GET_OPCODE(op)) {
552 case SLJIT_BREAKPOINT:
553 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
554 FAIL_IF(!buf);
555 INC_SIZE(1);
556 *buf = 0xcc;
557 break;
558 case SLJIT_NOP:
559 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
560 FAIL_IF(!buf);
561 INC_SIZE(1);
562 *buf = 0x90;
563 break;
564 case SLJIT_UMUL:
565 case SLJIT_SMUL:
566 case SLJIT_UDIV:
567 case SLJIT_SDIV:
568 compiler->flags_saved = 0;
569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
570 #ifdef _WIN64
571 SLJIT_COMPILE_ASSERT(
572 reg_map[SLJIT_TEMPORARY_REG1] == 0
573 && reg_map[SLJIT_TEMPORARY_REG2] == 2
574 && reg_map[TMP_REGISTER] > 7,
575 invalid_register_assignment_for_div_mul);
576 #else
577 SLJIT_COMPILE_ASSERT(
578 reg_map[SLJIT_TEMPORARY_REG1] == 0
579 && reg_map[SLJIT_TEMPORARY_REG2] < 7
580 && reg_map[TMP_REGISTER] == 2,
581 invalid_register_assignment_for_div_mul);
582 #endif
583 compiler->mode32 = op & SLJIT_INT_OP;
584 #endif
585
586 op = GET_OPCODE(op);
587 if (op == SLJIT_UDIV) {
588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
589 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
590 buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
591 #else
592 buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
593 #endif
594 FAIL_IF(!buf);
595 *buf = 0x33;
596 }
597
598 if (op == SLJIT_SDIV) {
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
600 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
601 #endif
602
603 /* CDQ instruction */
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
606 FAIL_IF(!buf);
607 INC_SIZE(1);
608 *buf = 0x99;
609 #else
610 if (compiler->mode32) {
611 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
612 FAIL_IF(!buf);
613 INC_SIZE(1);
614 *buf = 0x99;
615 } else {
616 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
617 FAIL_IF(!buf);
618 INC_SIZE(2);
619 *buf++ = REX_W;
620 *buf = 0x99;
621 }
622 #endif
623 }
624
625 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
626 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
627 FAIL_IF(!buf);
628 INC_SIZE(2);
629 *buf++ = 0xf7;
630 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
631 #else
632 #ifdef _WIN64
633 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
634 #else
635 size = (!compiler->mode32) ? 3 : 2;
636 #endif
637 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
638 FAIL_IF(!buf);
639 INC_SIZE(size);
640 #ifdef _WIN64
641 if (!compiler->mode32)
642 *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
643 else if (op >= SLJIT_UDIV)
644 *buf++ = REX_B;
645 *buf++ = 0xf7;
646 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
647 #else
648 if (!compiler->mode32)
649 *buf++ = REX_W;
650 *buf++ = 0xf7;
651 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
652 #endif
653 #endif
654 switch (op) {
655 case SLJIT_UMUL:
656 *buf |= 4 << 3;
657 break;
658 case SLJIT_SMUL:
659 *buf |= 5 << 3;
660 break;
661 case SLJIT_UDIV:
662 *buf |= 6 << 3;
663 break;
664 case SLJIT_SDIV:
665 *buf |= 7 << 3;
666 break;
667 }
668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
669 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
670 #endif
671 break;
672 }
673
674 return SLJIT_SUCCESS;
675 }
676
677 #define ENCODE_PREFIX(prefix) \
678 do { \
679 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
680 FAIL_IF(!code); \
681 INC_CSIZE(1); \
682 *code = (prefix); \
683 } while (0)
684
685 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
686 int dst, sljit_w dstw,
687 int src, sljit_w srcw)
688 {
689 sljit_ub* code;
690 int dst_r;
691 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
692 int work_r;
693 #endif
694
695 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
696 compiler->mode32 = 0;
697 #endif
698
699 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
700 return SLJIT_SUCCESS; /* Empty instruction. */
701
702 if (src & SLJIT_IMM) {
703 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
704 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
705 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
706 #else
707 return emit_load_imm64(compiler, dst, srcw);
708 #endif
709 }
710 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
711 FAIL_IF(!code);
712 *code = 0xc6;
713 return SLJIT_SUCCESS;
714 }
715
716 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
717
718 if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
719 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
720 if (reg_map[src] >= 4) {
721 SLJIT_ASSERT(dst_r == TMP_REGISTER);
722 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
723 } else
724 dst_r = src;
725 #else
726 dst_r = src;
727 #endif
728 }
729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
730 else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
731 /* src, dst are registers. */
732 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
733 if (reg_map[dst] < 4) {
734 if (dst != src)
735 EMIT_MOV(compiler, dst, 0, src, 0);
736 code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
737 FAIL_IF(!code);
738 *code++ = 0x0f;
739 *code = sign ? 0xbe : 0xb6;
740 }
741 else {
742 if (dst != src)
743 EMIT_MOV(compiler, dst, 0, src, 0);
744 if (sign) {
745 /* shl reg, 24 */
746 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
747 FAIL_IF(!code);
748 *code |= 0x4 << 3;
749 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
750 FAIL_IF(!code);
751 /* shr/sar reg, 24 */
752 *code |= 0x7 << 3;
753 }
754 else {
755 /* and dst, 0xff */
756 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
757 FAIL_IF(!code);
758 *(code + 1) |= 0x4 << 3;
759 }
760 }
761 return SLJIT_SUCCESS;
762 }
763 #endif
764 else {
765 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
766 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
767 FAIL_IF(!code);
768 *code++ = 0x0f;
769 *code = sign ? 0xbe : 0xb6;
770 }
771
772 if (dst & SLJIT_MEM) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
774 if (dst_r == TMP_REGISTER) {
775 /* Find a non-used register, whose reg_map[src] < 4. */
776 if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
777 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
778 work_r = SLJIT_TEMPORARY_REG3;
779 else
780 work_r = SLJIT_TEMPORARY_REG2;
781 }
782 else {
783 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
784 work_r = SLJIT_TEMPORARY_REG1;
785 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
786 work_r = SLJIT_TEMPORARY_REG3;
787 else
788 work_r = SLJIT_TEMPORARY_REG2;
789 }
790
791 if (work_r == SLJIT_TEMPORARY_REG1) {
792 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
793 }
794 else {
795 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
796 FAIL_IF(!code);
797 *code = 0x87;
798 }
799
800 code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
801 FAIL_IF(!code);
802 *code = 0x88;
803
804 if (work_r == SLJIT_TEMPORARY_REG1) {
805 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
806 }
807 else {
808 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
809 FAIL_IF(!code);
810 *code = 0x87;
811 }
812 }
813 else {
814 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
815 FAIL_IF(!code);
816 *code = 0x88;
817 }
818 #else
819 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
820 FAIL_IF(!code);
821 *code = 0x88;
822 #endif
823 }
824
825 return SLJIT_SUCCESS;
826 }
827
828 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
829 int dst, sljit_w dstw,
830 int src, sljit_w srcw)
831 {
832 sljit_ub* code;
833 int dst_r;
834
835 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
836 compiler->mode32 = 0;
837 #endif
838
839 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
840 return SLJIT_SUCCESS; /* Empty instruction. */
841
842 if (src & SLJIT_IMM) {
843 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
844 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
845 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
846 #else
847 return emit_load_imm64(compiler, dst, srcw);
848 #endif
849 }
850 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
851 FAIL_IF(!code);
852 *code = 0xc7;
853 return SLJIT_SUCCESS;
854 }
855
856 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
857
858 if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
859 dst_r = src;
860 else {
861 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
862 FAIL_IF(!code);
863 *code++ = 0x0f;
864 *code = sign ? 0xbf : 0xb7;
865 }
866
867 if (dst & SLJIT_MEM) {
868 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
869 FAIL_IF(!code);
870 *code = 0x89;
871 }
872
873 return SLJIT_SUCCESS;
874 }
875
876 static int emit_unary(struct sljit_compiler *compiler, int un_index,
877 int dst, sljit_w dstw,
878 int src, sljit_w srcw)
879 {
880 sljit_ub* code;
881
882 if (dst == SLJIT_UNUSED) {
883 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
884 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
885 FAIL_IF(!code);
886 *code++ = 0xf7;
887 *code |= (un_index) << 3;
888 return SLJIT_SUCCESS;
889 }
890 if (dst == src && dstw == srcw) {
891 /* Same input and output */
892 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
893 FAIL_IF(!code);
894 *code++ = 0xf7;
895 *code |= (un_index) << 3;
896 return SLJIT_SUCCESS;
897 }
898 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
899 EMIT_MOV(compiler, dst, 0, src, srcw);
900 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
901 FAIL_IF(!code);
902 *code++ = 0xf7;
903 *code |= (un_index) << 3;
904 return SLJIT_SUCCESS;
905 }
906 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
907 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
908 FAIL_IF(!code);
909 *code++ = 0xf7;
910 *code |= (un_index) << 3;
911 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
912 return SLJIT_SUCCESS;
913 }
914
915 static int emit_not_with_flags(struct sljit_compiler *compiler,
916 int dst, sljit_w dstw,
917 int src, sljit_w srcw)
918 {
919 sljit_ub* code;
920
921 if (dst == SLJIT_UNUSED) {
922 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
923 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
924 FAIL_IF(!code);
925 *code++ = 0xf7;
926 *code |= 0x2 << 3;
927 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
928 FAIL_IF(!code);
929 *code = 0x0b;
930 return SLJIT_SUCCESS;
931 }
932 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
933 EMIT_MOV(compiler, dst, 0, src, srcw);
934 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
935 FAIL_IF(!code);
936 *code++ = 0xf7;
937 *code |= 0x2 << 3;
938 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
939 FAIL_IF(!code);
940 *code = 0x0b;
941 return SLJIT_SUCCESS;
942 }
943 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
944 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
945 FAIL_IF(!code);
946 *code++ = 0xf7;
947 *code |= 0x2 << 3;
948 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
949 FAIL_IF(!code);
950 *code = 0x0b;
951 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
952 return SLJIT_SUCCESS;
953 }
954
955 static int emit_clz(struct sljit_compiler *compiler, int op,
956 int dst, sljit_w dstw,
957 int src, sljit_w srcw)
958 {
959 sljit_ub* code;
960 int dst_r;
961
962 SLJIT_UNUSED_ARG(op);
963 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
964 /* Just set the zero flag. */
965 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
966 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
967 FAIL_IF(!code);
968 *code++ = 0xf7;
969 *code |= 0x2 << 3;
970 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
971 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
972 #else
973 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
974 #endif
975 FAIL_IF(!code);
976 *code |= 0x5 << 3;
977 return SLJIT_SUCCESS;
978 }
979
980 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
981 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
982 src = TMP_REGISTER;
983 srcw = 0;
984 }
985
986 code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
987 FAIL_IF(!code);
988 *code++ = 0x0f;
989 *code = 0xbd;
990
991 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
992 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
993 dst_r = dst;
994 else {
995 /* Find an unused temporary register. */
996 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
997 dst_r = SLJIT_TEMPORARY_REG1;
998 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
999 dst_r = SLJIT_TEMPORARY_REG2;
1000 else
1001 dst_r = SLJIT_TEMPORARY_REG3;
1002 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1003 }
1004 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1005 #else
1006 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1007 compiler->mode32 = 0;
1008 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1009 compiler->mode32 = op & SLJIT_INT_OP;
1010 #endif
1011
1012 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1013 FAIL_IF(!code);
1014 *code++ = 0x0f;
1015 *code = 0x45;
1016
1017 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1018 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1019 #else
1020 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1021 #endif
1022 FAIL_IF(!code);
1023 *(code + 1) |= 0x6 << 3;
1024
1025 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1026 if (dst & SLJIT_MEM) {
1027 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1028 FAIL_IF(!code);
1029 *code = 0x87;
1030 }
1031 #else
1032 if (dst & SLJIT_MEM)
1033 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1034 #endif
1035 return SLJIT_SUCCESS;
1036 }
1037
1038 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1039 int dst, sljit_w dstw,
1040 int src, sljit_w srcw)
1041 {
1042 sljit_ub* code;
1043 int update = 0;
1044 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1045 int dst_is_ereg = 0;
1046 int src_is_ereg = 0;
1047 #else
1048 #define src_is_ereg 0
1049 #endif
1050
1051 CHECK_ERROR();
1052 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1053
1054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1055 compiler->mode32 = op & SLJIT_INT_OP;
1056 #endif
1057 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1058 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1059
1060 if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1061 op = GET_OPCODE(op);
1062 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1063 compiler->mode32 = 0;
1064 #endif
1065
1066 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1067 if (op >= SLJIT_MOVU) {
1068 update = 1;
1069 op -= 7;
1070 }
1071
1072 if (src & SLJIT_IMM) {
1073 switch (op) {
1074 case SLJIT_MOV_UB:
1075 srcw = (unsigned char)srcw;
1076 break;
1077 case SLJIT_MOV_SB:
1078 srcw = (signed char)srcw;
1079 break;
1080 case SLJIT_MOV_UH:
1081 srcw = (unsigned short)srcw;
1082 break;
1083 case SLJIT_MOV_SH:
1084 srcw = (signed short)srcw;
1085 break;
1086 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1087 case SLJIT_MOV_UI:
1088 srcw = (unsigned int)srcw;
1089 break;
1090 case SLJIT_MOV_SI:
1091 srcw = (signed int)srcw;
1092 break;
1093 #endif
1094 }
1095 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1096 if (SLJIT_UNLIKELY(dst_is_ereg))
1097 return emit_mov(compiler, dst, dstw, src, srcw);
1098 #endif
1099 }
1100
1101 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1102 code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1103 FAIL_IF(!code);
1104 *code = 0x8d;
1105 src &= SLJIT_MEM | 0xf;
1106 srcw = 0;
1107 }
1108
1109 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1110 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1111 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1112 dst = TMP_REGISTER;
1113 }
1114 #endif
1115
1116 switch (op) {
1117 case SLJIT_MOV:
1118 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1119 case SLJIT_MOV_UI:
1120 case SLJIT_MOV_SI:
1121 #endif
1122 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1123 break;
1124 case SLJIT_MOV_UB:
1125 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1126 break;
1127 case SLJIT_MOV_SB:
1128 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1129 break;
1130 case SLJIT_MOV_UH:
1131 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1132 break;
1133 case SLJIT_MOV_SH:
1134 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1135 break;
1136 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1137 case SLJIT_MOV_UI:
1138 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1139 break;
1140 case SLJIT_MOV_SI:
1141 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1142 break;
1143 #endif
1144 }
1145
1146 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1147 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1148 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1149 #endif
1150
1151 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1152 code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1153 FAIL_IF(!code);
1154 *code = 0x8d;
1155 }
1156 return SLJIT_SUCCESS;
1157 }
1158
1159 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1160 compiler->flags_saved = 0;
1161
1162 switch (GET_OPCODE(op)) {
1163 case SLJIT_NOT:
1164 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1165 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1166 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1167
1168 case SLJIT_NEG:
1169 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1170 FAIL_IF(emit_save_flags(compiler));
1171 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1172
1173 case SLJIT_CLZ:
1174 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1175 FAIL_IF(emit_save_flags(compiler));
1176 return emit_clz(compiler, op, dst, dstw, src, srcw);
1177 }
1178
1179 return SLJIT_SUCCESS;
1180
1181 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1182 #undef src_is_ereg
1183 #endif
1184 }
1185
1186 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1187
1188 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1189 if (IS_HALFWORD(immw) || compiler->mode32) { \
1190 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1191 FAIL_IF(!code); \
1192 *(code + 1) |= (_op_imm_); \
1193 } \
1194 else { \
1195 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1196 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1197 FAIL_IF(!code); \
1198 *code = (_op_mr_); \
1199 }
1200
1201 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1202 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1203
1204 #else
1205
1206 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1207 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1208 FAIL_IF(!code); \
1209 *(code + 1) |= (_op_imm_);
1210
1211 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1212 FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1213
1214 #endif
1215
1216 static int emit_cum_binary(struct sljit_compiler *compiler,
1217 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1218 int dst, sljit_w dstw,
1219 int src1, sljit_w src1w,
1220 int src2, sljit_w src2w)
1221 {
1222 sljit_ub* code;
1223
1224 if (dst == SLJIT_UNUSED) {
1225 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1226 if (src2 & SLJIT_IMM) {
1227 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1228 }
1229 else {
1230 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1231 FAIL_IF(!code);
1232 *code = op_rm;
1233 }
1234 return SLJIT_SUCCESS;
1235 }
1236
1237 if (dst == src1 && dstw == src1w) {
1238 if (src2 & SLJIT_IMM) {
1239 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1240 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1241 #else
1242 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1243 #endif
1244 BINARY_EAX_IMM(op_eax_imm, src2w);
1245 }
1246 else {
1247 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1248 }
1249 }
1250 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1251 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1252 FAIL_IF(!code);
1253 *code = op_rm;
1254 }
1255 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1256 /* Special exception for sljit_emit_cond_value. */
1257 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1258 FAIL_IF(!code);
1259 *code = op_mr;
1260 }
1261 else {
1262 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1263 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1264 FAIL_IF(!code);
1265 *code = op_mr;
1266 }
1267 return SLJIT_SUCCESS;
1268 }
1269
1270 /* Only for cumulative operations. */
1271 if (dst == src2 && dstw == src2w) {
1272 if (src1 & SLJIT_IMM) {
1273 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1274 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1275 #else
1276 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1277 #endif
1278 BINARY_EAX_IMM(op_eax_imm, src1w);
1279 }
1280 else {
1281 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1282 }
1283 }
1284 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1285 code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1286 FAIL_IF(!code);
1287 *code = op_rm;
1288 }
1289 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1290 code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1291 FAIL_IF(!code);
1292 *code = op_mr;
1293 }
1294 else {
1295 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1296 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1297 FAIL_IF(!code);
1298 *code = op_mr;
1299 }
1300 return SLJIT_SUCCESS;
1301 }
1302
1303 /* General version. */
1304 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1305 EMIT_MOV(compiler, dst, 0, src1, src1w);
1306 if (src2 & SLJIT_IMM) {
1307 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1308 }
1309 else {
1310 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1311 FAIL_IF(!code);
1312 *code = op_rm;
1313 }
1314 }
1315 else {
1316 /* This version requires less memory writing. */
1317 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1318 if (src2 & SLJIT_IMM) {
1319 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1320 }
1321 else {
1322 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1323 FAIL_IF(!code);
1324 *code = op_rm;
1325 }
1326 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1327 }
1328
1329 return SLJIT_SUCCESS;
1330 }
1331
1332 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1333 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1334 int dst, sljit_w dstw,
1335 int src1, sljit_w src1w,
1336 int src2, sljit_w src2w)
1337 {
1338 sljit_ub* code;
1339
1340 if (dst == SLJIT_UNUSED) {
1341 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1342 if (src2 & SLJIT_IMM) {
1343 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1344 }
1345 else {
1346 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1347 FAIL_IF(!code);
1348 *code = op_rm;
1349 }
1350 return SLJIT_SUCCESS;
1351 }
1352
1353 if (dst == src1 && dstw == src1w) {
1354 if (src2 & SLJIT_IMM) {
1355 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1356 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1357 #else
1358 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1359 #endif
1360 BINARY_EAX_IMM(op_eax_imm, src2w);
1361 }
1362 else {
1363 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1364 }
1365 }
1366 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1367 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1368 FAIL_IF(!code);
1369 *code = op_rm;
1370 }
1371 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1372 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1373 FAIL_IF(!code);
1374 *code = op_mr;
1375 }
1376 else {
1377 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1378 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1379 FAIL_IF(!code);
1380 *code = op_mr;
1381 }
1382 return SLJIT_SUCCESS;
1383 }
1384
1385 /* General version. */
1386 if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1387 EMIT_MOV(compiler, dst, 0, src1, src1w);
1388 if (src2 & SLJIT_IMM) {
1389 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1390 }
1391 else {
1392 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1393 FAIL_IF(!code);
1394 *code = op_rm;
1395 }
1396 }
1397 else {
1398 /* This version requires less memory writing. */
1399 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1400 if (src2 & SLJIT_IMM) {
1401 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1402 }
1403 else {
1404 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1405 FAIL_IF(!code);
1406 *code = op_rm;
1407 }
1408 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1409 }
1410
1411 return SLJIT_SUCCESS;
1412 }
1413
1414 static int emit_mul(struct sljit_compiler *compiler,
1415 int dst, sljit_w dstw,
1416 int src1, sljit_w src1w,
1417 int src2, sljit_w src2w)
1418 {
1419 sljit_ub* code;
1420 int dst_r;
1421
1422 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1423
1424 /* Register destination. */
1425 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1426 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1427 FAIL_IF(!code);
1428 *code++ = 0x0f;
1429 *code = 0xaf;
1430 }
1431 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1432 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1433 FAIL_IF(!code);
1434 *code++ = 0x0f;
1435 *code = 0xaf;
1436 }
1437 else if (src1 & SLJIT_IMM) {
1438 if (src2 & SLJIT_IMM) {
1439 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1440 src2 = dst_r;
1441 src2w = 0;
1442 }
1443
1444 if (src1w <= 127 && src1w >= -128) {
1445 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1446 FAIL_IF(!code);
1447 *code = 0x6b;
1448 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1449 FAIL_IF(!code);
1450 INC_CSIZE(1);
1451 *code = (sljit_b)src1w;
1452 }
1453 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1454 else {
1455 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1456 FAIL_IF(!code);
1457 *code = 0x69;
1458 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1459 FAIL_IF(!code);
1460 INC_CSIZE(4);
1461 *(sljit_w*)code = src1w;
1462 }
1463 #else
1464 else if (IS_HALFWORD(src1w)) {
1465 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1466 FAIL_IF(!code);
1467 *code = 0x69;
1468 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1469 FAIL_IF(!code);
1470 INC_CSIZE(4);
1471 *(sljit_hw*)code = (sljit_hw)src1w;
1472 }
1473 else {
1474 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1475 if (dst_r != src2)
1476 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1477 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1478 FAIL_IF(!code);
1479 *code++ = 0x0f;
1480 *code = 0xaf;
1481 }
1482 #endif
1483 }
1484 else if (src2 & SLJIT_IMM) {
1485 /* Note: src1 is NOT immediate. */
1486
1487 if (src2w <= 127 && src2w >= -128) {
1488 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1489 FAIL_IF(!code);
1490 *code = 0x6b;
1491 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1492 FAIL_IF(!code);
1493 INC_CSIZE(1);
1494 *code = (sljit_b)src2w;
1495 }
1496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1497 else {
1498 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1499 FAIL_IF(!code);
1500 *code = 0x69;
1501 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1502 FAIL_IF(!code);
1503 INC_CSIZE(4);
1504 *(sljit_w*)code = src2w;
1505 }
1506 #else
1507 else if (IS_HALFWORD(src2w)) {
1508 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1509 FAIL_IF(!code);
1510 *code = 0x69;
1511 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1512 FAIL_IF(!code);
1513 INC_CSIZE(4);
1514 *(sljit_hw*)code = (sljit_hw)src2w;
1515 }
1516 else {
1517 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1518 if (dst_r != src1)
1519 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1520 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1521 FAIL_IF(!code);
1522 *code++ = 0x0f;
1523 *code = 0xaf;
1524 }
1525 #endif
1526 }
1527 else {
1528 /* Neither argument is immediate. */
1529 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1530 dst_r = TMP_REGISTER;
1531 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1532 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1533 FAIL_IF(!code);
1534 *code++ = 0x0f;
1535 *code = 0xaf;
1536 }
1537
1538 if (dst_r == TMP_REGISTER)
1539 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1540
1541 return SLJIT_SUCCESS;
1542 }
1543
1544 static int emit_lea_binary(struct sljit_compiler *compiler,
1545 int dst, sljit_w dstw,
1546 int src1, sljit_w src1w,
1547 int src2, sljit_w src2w)
1548 {
1549 sljit_ub* code;
1550 int dst_r, done = 0;
1551
1552 /* These cases better be left to handled by normal way. */
1553 if (dst == src1 && dstw == src1w)
1554 return SLJIT_ERR_UNSUPPORTED;
1555 if (dst == src2 && dstw == src2w)
1556 return SLJIT_ERR_UNSUPPORTED;
1557
1558 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1559
1560 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1561 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1562 /* It is not possible to be both SLJIT_LOCALS_REG. */
1563 if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1564 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1565 FAIL_IF(!code);
1566 *code = 0x8d;
1567 done = 1;
1568 }
1569 }
1570 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1571 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1572 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1573 #else
1574 if (src2 & SLJIT_IMM) {
1575 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1576 #endif
1577 FAIL_IF(!code);
1578 *code = 0x8d;
1579 done = 1;
1580 }
1581 }
1582 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1583 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1584 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1585 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1586 #else
1587 if (src1 & SLJIT_IMM) {
1588 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1589 #endif
1590 FAIL_IF(!code);
1591 *code = 0x8d;
1592 done = 1;
1593 }
1594 }
1595
1596 if (done) {
1597 if (dst_r == TMP_REGISTER)
1598 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1599 return SLJIT_SUCCESS;
1600 }
1601 return SLJIT_ERR_UNSUPPORTED;
1602 }
1603
1604 static int emit_cmp_binary(struct sljit_compiler *compiler,
1605 int src1, sljit_w src1w,
1606 int src2, sljit_w src2w)
1607 {
1608 sljit_ub* code;
1609
1610 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1611 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1612 #else
1613 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1614 #endif
1615 BINARY_EAX_IMM(0x3d, src2w);
1616 return SLJIT_SUCCESS;
1617 }
1618
1619 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1620 if (src2 & SLJIT_IMM) {
1621 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1622 }
1623 else {
1624 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1625 FAIL_IF(!code);
1626 *code = 0x3b;
1627 }
1628 return SLJIT_SUCCESS;
1629 }
1630
1631 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1632 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1633 FAIL_IF(!code);
1634 *code = 0x39;
1635 return SLJIT_SUCCESS;
1636 }
1637
1638 if (src2 & SLJIT_IMM) {
1639 if (src1 & SLJIT_IMM) {
1640 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1641 src1 = TMP_REGISTER;
1642 src1w = 0;
1643 }
1644 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1645 }
1646 else {
1647 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1648 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1649 FAIL_IF(!code);
1650 *code = 0x3b;
1651 }
1652 return SLJIT_SUCCESS;
1653 }
1654
1655 static int emit_test_binary(struct sljit_compiler *compiler,
1656 int src1, sljit_w src1w,
1657 int src2, sljit_w src2w)
1658 {
1659 sljit_ub* code;
1660
1661 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1662 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1663 #else
1664 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1665 #endif
1666 BINARY_EAX_IMM(0xa9, src2w);
1667 return SLJIT_SUCCESS;
1668 }
1669
1670 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1671 if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1672 #else
1673 if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1674 #endif
1675 BINARY_EAX_IMM(0xa9, src1w);
1676 return SLJIT_SUCCESS;
1677 }
1678
1679 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1680 if (src2 & SLJIT_IMM) {
1681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1682 if (IS_HALFWORD(src2w) || compiler->mode32) {
1683 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1684 FAIL_IF(!code);
1685 *code = 0xf7;
1686 }
1687 else {
1688 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1689 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1690 FAIL_IF(!code);
1691 *code = 0x85;
1692 }
1693 #else
1694 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1695 FAIL_IF(!code);
1696 *code = 0xf7;
1697 #endif
1698 }
1699 else {
1700 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1701 FAIL_IF(!code);
1702 *code = 0x85;
1703 }
1704 return SLJIT_SUCCESS;
1705 }
1706
1707 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1708 if (src1 & SLJIT_IMM) {
1709 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1710 if (IS_HALFWORD(src1w) || compiler->mode32) {
1711 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1712 FAIL_IF(!code);
1713 *code = 0xf7;
1714 }
1715 else {
1716 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1717 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1718 FAIL_IF(!code);
1719 *code = 0x85;
1720 }
1721 #else
1722 code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1723 FAIL_IF(!code);
1724 *code = 0xf7;
1725 #endif
1726 }
1727 else {
1728 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1729 FAIL_IF(!code);
1730 *code = 0x85;
1731 }
1732 return SLJIT_SUCCESS;
1733 }
1734
1735 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1736 if (src2 & SLJIT_IMM) {
1737 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1738 if (IS_HALFWORD(src2w) || compiler->mode32) {
1739 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1740 FAIL_IF(!code);
1741 *code = 0xf7;
1742 }
1743 else {
1744 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1745 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1746 FAIL_IF(!code);
1747 *code = 0x85;
1748 }
1749 #else
1750 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1751 FAIL_IF(!code);
1752 *code = 0xf7;
1753 #endif
1754 }
1755 else {
1756 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1757 FAIL_IF(!code);
1758 *code = 0x85;
1759 }
1760 return SLJIT_SUCCESS;
1761 }
1762
1763 static int emit_shift(struct sljit_compiler *compiler,
1764 sljit_ub mode,
1765 int dst, sljit_w dstw,
1766 int src1, sljit_w src1w,
1767 int src2, sljit_w src2w)
1768 {
1769 sljit_ub* code;
1770
1771 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1772 if (dst == src1 && dstw == src1w) {
1773 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1774 FAIL_IF(!code);
1775 *code |= mode;
1776 return SLJIT_SUCCESS;
1777 }
1778 if (dst == SLJIT_UNUSED) {
1779 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1780 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1781 FAIL_IF(!code);
1782 *code |= mode;
1783 return SLJIT_SUCCESS;
1784 }
1785 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1786 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1787 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1788 FAIL_IF(!code);
1789 *code |= mode;
1790 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1791 return SLJIT_SUCCESS;
1792 }
1793 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1794 EMIT_MOV(compiler, dst, 0, src1, src1w);
1795 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1796 FAIL_IF(!code);
1797 *code |= mode;
1798 return SLJIT_SUCCESS;
1799 }
1800
1801 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1802 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1803 FAIL_IF(!code);
1804 *code |= mode;
1805 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1806 return SLJIT_SUCCESS;
1807 }
1808
1809 if (dst == SLJIT_PREF_SHIFT_REG) {
1810 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1811 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1812 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1813 FAIL_IF(!code);
1814 *code |= mode;
1815 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1816 }
1817 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1818 if (src1 != dst)
1819 EMIT_MOV(compiler, dst, 0, src1, src1w);
1820 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1821 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1822 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1823 FAIL_IF(!code);
1824 *code |= mode;
1825 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1826 }
1827 else {
1828 /* This case is really difficult, since ecx itself may used for
1829 addressing, and we must ensure to work even in that case. */
1830 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1831 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1832 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1833 #else
1834 /* [esp - 4] is reserved for eflags. */
1835 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1836 #endif
1837 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1838 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1839 FAIL_IF(!code);
1840 *code |= mode;
1841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1842 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1843 #else
1844 /* [esp - 4] is reserved for eflags. */
1845 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1846 #endif
1847 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1848 }
1849
1850 return SLJIT_SUCCESS;
1851 }
1852
1853 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1854 sljit_ub mode, int set_flags,
1855 int dst, sljit_w dstw,
1856 int src1, sljit_w src1w,
1857 int src2, sljit_w src2w)
1858 {
1859 /* The CPU does not set flags if the shift count is 0. */
1860 if (src2 & SLJIT_IMM) {
1861 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1862 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1863 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1864 #else
1865 if ((src2w & 0x1f) != 0)
1866 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1867 #endif
1868 if (!set_flags)
1869 return emit_mov(compiler, dst, dstw, src1, src1w);
1870 /* OR dst, src, 0 */
1871 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1872 dst, dstw, src1, src1w, SLJIT_IMM, 0);
1873 }
1874
1875 if (!set_flags)
1876 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1877
1878 if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1879 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1880
1881 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1882
1883 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1884 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1885 return SLJIT_SUCCESS;
1886 }
1887
1888 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1889 int dst, sljit_w dstw,
1890 int src1, sljit_w src1w,
1891 int src2, sljit_w src2w)
1892 {
1893 CHECK_ERROR();
1894 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1895
1896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1897 compiler->mode32 = op & SLJIT_INT_OP;
1898 #endif
1899 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1900 CHECK_EXTRA_REGS(src1, src1w, (void)0);
1901 CHECK_EXTRA_REGS(src2, src2w, (void)0);
1902
1903 if (GET_OPCODE(op) >= SLJIT_MUL) {
1904 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1905 compiler->flags_saved = 0;
1906 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1907 FAIL_IF(emit_save_flags(compiler));
1908 }
1909
1910 switch (GET_OPCODE(op)) {
1911 case SLJIT_ADD:
1912 if (!GET_FLAGS(op)) {
1913 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1914 return compiler->error;
1915 }
1916 else
1917 compiler->flags_saved = 0;
1918 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1919 FAIL_IF(emit_save_flags(compiler));
1920 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1921 dst, dstw, src1, src1w, src2, src2w);
1922 case SLJIT_ADDC:
1923 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1924 FAIL_IF(emit_restore_flags(compiler, 1));
1925 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1926 FAIL_IF(emit_save_flags(compiler));
1927 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1928 compiler->flags_saved = 0;
1929 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1930 dst, dstw, src1, src1w, src2, src2w);
1931 case SLJIT_SUB:
1932 if (!GET_FLAGS(op)) {
1933 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1934 return compiler->error;
1935 }
1936 else
1937 compiler->flags_saved = 0;
1938 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1939 FAIL_IF(emit_save_flags(compiler));
1940 if (dst == SLJIT_UNUSED)
1941 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1942 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1943 dst, dstw, src1, src1w, src2, src2w);
1944 case SLJIT_SUBC:
1945 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1946 FAIL_IF(emit_restore_flags(compiler, 1));
1947 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1948 FAIL_IF(emit_save_flags(compiler));
1949 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1950 compiler->flags_saved = 0;
1951 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1952 dst, dstw, src1, src1w, src2, src2w);
1953 case SLJIT_MUL:
1954 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1955 case SLJIT_AND:
1956 if (dst == SLJIT_UNUSED)
1957 return emit_test_binary(compiler, src1, src1w, src2, src2w);
1958 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1959 dst, dstw, src1, src1w, src2, src2w);
1960 case SLJIT_OR:
1961 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1962 dst, dstw, src1, src1w, src2, src2w);
1963 case SLJIT_XOR:
1964 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1965 dst, dstw, src1, src1w, src2, src2w);
1966 case SLJIT_SHL:
1967 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1968 dst, dstw, src1, src1w, src2, src2w);
1969 case SLJIT_LSHR:
1970 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1971 dst, dstw, src1, src1w, src2, src2w);
1972 case SLJIT_ASHR:
1973 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1974 dst, dstw, src1, src1w, src2, src2w);
1975 }
1976
1977 return SLJIT_SUCCESS;
1978 }
1979
1980 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1981 {
1982 check_sljit_get_register_index(reg);
1983 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1984 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1985 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
1986 return -1;
1987 #endif
1988 return reg_map[reg];
1989 }
1990
1991 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1992 void *instruction, int size)
1993 {
1994 sljit_ub *buf;
1995
1996 CHECK_ERROR();
1997 check_sljit_emit_op_custom(compiler, instruction, size);
1998 SLJIT_ASSERT(size > 0 && size < 16);
1999
2000 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2001 FAIL_IF(!buf);
2002 INC_SIZE(size);
2003 SLJIT_MEMMOVE(buf, instruction, size);
2004 return SLJIT_SUCCESS;
2005 }
2006
2007 /* --------------------------------------------------------------------- */
2008 /* Floating point operators */
2009 /* --------------------------------------------------------------------- */
2010
2011 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2012 static int sse2_available = 0;
2013 #endif
2014
2015 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2016
2017 /* Alignment + 2 * 16 bytes. */
2018 static sljit_i sse2_data[3 + 4 + 4];
2019 static sljit_i *sse2_buffer;
2020
2021 static void init_compiler()
2022 {
2023 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2024 int features = 0;
2025 #endif
2026
2027 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2028 sse2_buffer[0] = 0;
2029 sse2_buffer[1] = 0x80000000;
2030 sse2_buffer[4] = 0xffffffff;
2031 sse2_buffer[5] = 0x7fffffff;
2032
2033 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2034 #ifdef __GNUC__
2035 /* AT&T syntax. */
2036 asm (
2037 "pushl %%ebx\n"
2038 "movl $0x1, %%eax\n"
2039 "cpuid\n"
2040 "popl %%ebx\n"
2041 "movl %%edx, %0\n"
2042 : "=g" (features)
2043 :
2044 : "%eax", "%ecx", "%edx"
2045 );
2046 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2047 /* Intel syntax. */
2048 __asm {
2049 mov eax, 1
2050 push ebx
2051 cpuid
2052 pop ebx
2053 mov features, edx
2054 }
2055 #else
2056 #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2057 #endif
2058 sse2_available = (features >> 26) & 0x1;
2059 #endif
2060 }
2061
2062 #endif
2063
2064 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2065 {
2066 /* Always available. */
2067 return 1;
2068 }
2069
2070 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2071
2072 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2073 int xmm1, int xmm2, sljit_w xmm2w)
2074 {
2075 sljit_ub *buf;
2076
2077 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2078 FAIL_IF(!buf);
2079 *buf++ = 0x0f;
2080 *buf = opcode;
2081 return SLJIT_SUCCESS;
2082 }
2083
2084 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2085 int xmm1, int xmm2, sljit_w xmm2w)
2086 {
2087 sljit_ub *buf;
2088
2089 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2090 FAIL_IF(!buf);
2091 *buf++ = 0x0f;
2092 *buf = opcode;
2093 return SLJIT_SUCCESS;
2094 }
2095
2096 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2097 int dst, int src, sljit_w srcw)
2098 {
2099 return emit_sse2(compiler, 0x10, dst, src, srcw);
2100 }
2101
2102 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2103 int dst, sljit_w dstw, int src)
2104 {
2105 return emit_sse2(compiler, 0x11, src, dst, dstw);
2106 }
2107
2108 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2109 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2110 #else
2111 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2112 #endif
2113 int dst, sljit_w dstw,
2114 int src, sljit_w srcw)
2115 {
2116 int dst_r;
2117
2118 CHECK_ERROR();
2119 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2120
2121 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2122 compiler->mode32 = 1;
2123 #endif
2124
2125 if (GET_OPCODE(op) == SLJIT_FCMP) {
2126 compiler->flags_saved = 0;
2127 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2128 dst_r = dst;
2129 else {
2130 dst_r = TMP_FREG;
2131 FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2132 }
2133 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2134 }
2135
2136 if (op == SLJIT_FMOV) {
2137 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2138 return emit_sse2_load(compiler, dst, src, srcw);
2139 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2140 return emit_sse2_store(compiler, dst, dstw, src);
2141 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2142 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2143 }
2144
2145 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2146 dst_r = dst;
2147 if (dst != src)
2148 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2149 }
2150 else {
2151 dst_r = TMP_FREG;
2152 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2153 }
2154
2155 switch (op) {
2156 case SLJIT_FNEG:
2157 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2158 break;
2159
2160 case SLJIT_FABS:
2161 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2162 break;
2163 }
2164
2165 if (dst_r == TMP_FREG)
2166 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2167 return SLJIT_SUCCESS;
2168 }
2169
2170 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2171 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2172 #else
2173 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2174 #endif
2175 int dst, sljit_w dstw,
2176 int src1, sljit_w src1w,
2177 int src2, sljit_w src2w)
2178 {
2179 int dst_r;
2180
2181 CHECK_ERROR();
2182 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2183
2184 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2185 compiler->mode32 = 1;
2186 #endif
2187
2188 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2189 dst_r = dst;
2190 if (dst == src1)
2191 ; /* Do nothing here. */
2192 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2193 /* Swap arguments. */
2194 src2 = src1;
2195 src2w = src1w;
2196 }
2197 else if (dst != src2)
2198 FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2199 else {
2200 dst_r = TMP_FREG;
2201 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2202 }
2203 }
2204 else {
2205 dst_r = TMP_FREG;
2206 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2207 }
2208
2209 switch (op) {
2210 case SLJIT_FADD:
2211 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2212 break;
2213
2214 case SLJIT_FSUB:
2215 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2216 break;
2217
2218 case SLJIT_FMUL:
2219 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2220 break;
2221
2222 case SLJIT_FDIV:
2223 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2224 break;
2225 }
2226
2227 if (dst_r == TMP_FREG)
2228 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2229 return SLJIT_SUCCESS;
2230 }
2231
2232 #endif
2233
2234 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2235
2236 static int emit_fld(struct sljit_compiler *compiler,
2237 int src, sljit_w srcw)
2238 {
2239 sljit_ub *buf;
2240
2241 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2242 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2243 FAIL_IF(!buf);
2244 INC_SIZE(2);
2245 *buf++ = 0xd9;
2246 *buf = 0xc0 + src - 1;
2247 return SLJIT_SUCCESS;
2248 }
2249
2250 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2251 FAIL_IF(!buf);
2252 *buf = 0xdd;
2253 return SLJIT_SUCCESS;
2254 }
2255
2256 static int emit_fop(struct sljit_compiler *compiler,
2257 sljit_ub st_arg, sljit_ub st_arg2,
2258 sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2259 int src, sljit_w srcw)
2260 {
2261 sljit_ub *buf;
2262
2263 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2264 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2265 FAIL_IF(!buf);
2266 INC_SIZE(2);
2267 *buf++ = st_arg;
2268 *buf = st_arg2 + src;
2269 return SLJIT_SUCCESS;
2270 }
2271
2272 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2273 FAIL_IF(!buf);
2274 *buf++ = m64fp_arg;
2275 *buf |= m64fp_arg2;
2276 return SLJIT_SUCCESS;
2277 }
2278
2279 static int emit_fop_regs(struct sljit_compiler *compiler,
2280 sljit_ub st_arg, sljit_ub st_arg2,
2281 int src)
2282 {
2283 sljit_ub *buf;
2284
2285 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2286 FAIL_IF(!buf);
2287 INC_SIZE(2);
2288 *buf++ = st_arg;
2289 *buf = st_arg2 + src;
2290 return SLJIT_SUCCESS;
2291 }
2292
2293 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2294 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2295 #else
2296 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2297 #endif
2298 int dst, sljit_w dstw,
2299 int src, sljit_w srcw)
2300 {
2301 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2302 sljit_ub *buf;
2303 #endif
2304
2305 CHECK_ERROR();
2306 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2307
2308 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2309 compiler->mode32 = 1;
2310 #endif
2311
2312 if (GET_OPCODE(op) == SLJIT_FCMP) {
2313 compiler->flags_saved = 0;
2314 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2315 FAIL_IF(emit_fld(compiler, dst, dstw));
2316 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2317
2318 /* Copy flags. */
2319 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2320 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2321 FAIL_IF(!buf);
2322 INC_SIZE(3);
2323 *buf++ = 0xdf;
2324 *buf++ = 0xe0;
2325 /* Note: lahf is not supported on all x86-64 architectures. */
2326 *buf++ = 0x9e;
2327 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2328 #else
2329 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2330 FAIL_IF(emit_fld(compiler, dst, dstw));
2331 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2332 } else {
2333 FAIL_IF(emit_fld(compiler, src, srcw));
2334 FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2335 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2336 FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2337 }
2338 #endif
2339 return SLJIT_SUCCESS;
2340 }
2341
2342 FAIL_IF(emit_fld(compiler, src, srcw));
2343
2344 switch (op) {
2345 case SLJIT_FNEG:
2346 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2347 break;
2348 case SLJIT_FABS:
2349 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2350 break;
2351 }
2352
2353 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2354
2355 return SLJIT_SUCCESS;
2356 }
2357
2358 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2359 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2360 #else
2361 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2362 #endif
2363 int dst, sljit_w dstw,
2364 int src1, sljit_w src1w,
2365 int src2, sljit_w src2w)
2366 {
2367 CHECK_ERROR();
2368 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2369
2370 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2371 compiler->mode32 = 1;
2372 #endif
2373
2374 if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2375 FAIL_IF(emit_fld(compiler, src2, src2w));
2376
2377 switch (op) {
2378 case SLJIT_FADD:
2379 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2380 break;
2381 case SLJIT_FSUB:
2382 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2383 break;
2384 case SLJIT_FMUL:
2385 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2386 break;
2387 case SLJIT_FDIV:
2388 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2389 break;
2390 }
2391 return SLJIT_SUCCESS;
2392 }
2393
2394 FAIL_IF(emit_fld(compiler, src1, src1w));
2395
2396 if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2397 switch (op) {
2398 case SLJIT_FADD:
2399 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2400 break;
2401 case SLJIT_FSUB:
2402 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2403 break;
2404 case SLJIT_FMUL:
2405 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2406 break;
2407 case SLJIT_FDIV:
2408 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2409 break;
2410 }
2411 return SLJIT_SUCCESS;
2412 }
2413
2414 switch (op) {
2415 case SLJIT_FADD:
2416 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2417 break;
2418 case SLJIT_FSUB:
2419 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2420 break;
2421 case SLJIT_FMUL:
2422 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2423 break;
2424 case SLJIT_FDIV:
2425 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2426 break;
2427 }
2428
2429 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2430
2431 return SLJIT_SUCCESS;
2432 }
2433 #endif
2434
2435 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2436
2437 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2438 int dst, sljit_w dstw,
2439 int src, sljit_w srcw)
2440 {
2441 if (sse2_available)
2442 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2443 else
2444 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2445 }
2446
2447 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2448 int dst, sljit_w dstw,
2449 int src1, sljit_w src1w,
2450 int src2, sljit_w src2w)
2451 {
2452 if (sse2_available)
2453 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2454 else
2455 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2456 }
2457
2458 #endif
2459
2460 /* --------------------------------------------------------------------- */
2461 /* Conditional instructions */
2462 /* --------------------------------------------------------------------- */
2463
2464 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2465 {
2466 sljit_ub *buf;
2467 struct sljit_label *label;
2468
2469 CHECK_ERROR_PTR();
2470 check_sljit_emit_label(compiler);
2471
2472 /* We should restore the flags before the label,
2473 since other taken jumps has their own flags as well. */
2474 if (SLJIT_UNLIKELY(compiler->flags_saved))
2475 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2476
2477 if (compiler->last_label && compiler->last_label->size == compiler->size)
2478 return compiler->last_label;
2479
2480 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2481 PTR_FAIL_IF(!label);
2482 set_label(label, compiler);
2483
2484 buf = (sljit_ub*)ensure_buf(compiler, 2);
2485 PTR_FAIL_IF(!buf);
2486
2487 *buf++ = 0;
2488 *buf++ = 0;
2489
2490 return label;
2491 }
2492
2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2494 {
2495 sljit_ub *buf;
2496 struct sljit_jump *jump;
2497
2498 CHECK_ERROR_PTR();
2499 check_sljit_emit_jump(compiler, type);
2500
2501 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2502 if ((type & 0xff) <= SLJIT_JUMP)
2503 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2504 compiler->flags_saved = 0;
2505 }
2506
2507 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2508 PTR_FAIL_IF_NULL(jump);
2509 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2510 type &= 0xff;
2511
2512 if (type >= SLJIT_CALL1)
2513 PTR_FAIL_IF(call_with_args(compiler, type));
2514
2515 /* Worst case size. */
2516 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2517 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2518 #else
2519 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2520 #endif
2521
2522 buf = (sljit_ub*)ensure_buf(compiler, 2);
2523 PTR_FAIL_IF_NULL(buf);
2524
2525 *buf++ = 0;
2526 *buf++ = type + 4;
2527 return jump;
2528 }
2529
2530 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2531 {
2532 sljit_ub *code;
2533 struct sljit_jump *jump;
2534
2535 CHECK_ERROR();
2536 check_sljit_emit_ijump(compiler, type, src, srcw);
2537
2538 CHECK_EXTRA_REGS(src, srcw, (void)0);
2539 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2540 if (type <= SLJIT_JUMP)
2541 FAIL_IF(emit_restore_flags(compiler, 0));
2542 compiler->flags_saved = 0;
2543 }
2544
2545 if (type >= SLJIT_CALL1) {
2546 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2547 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2548 if (src == SLJIT_TEMPORARY_REG3) {
2549 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2550 src = TMP_REGISTER;
2551 }
2552 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2553 if (src & 0xf0) {
2554 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2555 src = TMP_REGISTER;
2556 }
2557 else
2558 srcw += sizeof(sljit_w);
2559 }
2560 #else
2561 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2562 if (src & 0xf0) {
2563 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2564 src = TMP_REGISTER;
2565 }
2566 else
2567 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2568 }
2569 #endif
2570 #endif
2571 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2572 if (src == SLJIT_TEMPORARY_REG3) {
2573 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2574 src = TMP_REGISTER;
2575 }
2576 #endif
2577 FAIL_IF(call_with_args(compiler, type));
2578 }
2579
2580 if (src == SLJIT_IMM) {
2581 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2582 FAIL_IF_NULL(jump);
2583 set_jump(jump, compiler, JUMP_ADDR);
2584 jump->u.target = srcw;
2585
2586 /* Worst case size. */
2587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2588 compiler->size += 5;
2589 #else
2590 compiler->size += 10 + 3;
2591 #endif
2592
2593 code = (sljit_ub*)ensure_buf(compiler, 2);
2594 FAIL_IF_NULL(code);
2595
2596 *code++ = 0;
2597 *code++ = type + 4;
2598 }
2599 else {
2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2601 /* REX_W is not necessary (src is not immediate). */
2602 compiler->mode32 = 1;
2603 #endif
2604 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2605 FAIL_IF(!code);
2606 *code++ = 0xff;
2607 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2608 }
2609 return SLJIT_SUCCESS;
2610 }
2611
2612 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2613 {
2614 sljit_ub *buf;
2615 sljit_ub cond_set = 0;
2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2617 int reg;
2618 #endif
2619
2620 CHECK_ERROR();
2621 check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2622
2623 if (dst == SLJIT_UNUSED)
2624 return SLJIT_SUCCESS;
2625
2626 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2627 if (SLJIT_UNLIKELY(compiler->flags_saved))
2628 FAIL_IF(emit_restore_flags(compiler, 0));
2629
2630 switch (type) {
2631 case SLJIT_C_EQUAL:
2632 case SLJIT_C_FLOAT_EQUAL:
2633 cond_set = 0x94;
2634 break;
2635
2636 case SLJIT_C_NOT_EQUAL:
2637 case SLJIT_C_FLOAT_NOT_EQUAL:
2638 cond_set = 0x95;
2639 break;
2640
2641 case SLJIT_C_LESS:
2642 case SLJIT_C_FLOAT_LESS:
2643 cond_set = 0x92;
2644 break;
2645
2646 case SLJIT_C_GREATER_EQUAL:
2647 case SLJIT_C_FLOAT_GREATER_EQUAL:
2648 cond_set = 0x93;
2649 break;
2650
2651 case SLJIT_C_GREATER:
2652 case SLJIT_C_FLOAT_GREATER:
2653 cond_set = 0x97;
2654 break;
2655
2656 case SLJIT_C_LESS_EQUAL:
2657 case SLJIT_C_FLOAT_LESS_EQUAL:
2658 cond_set = 0x96;
2659 break;
2660
2661 case SLJIT_C_SIG_LESS:
2662 cond_set = 0x9c;
2663 break;
2664
2665 case SLJIT_C_SIG_GREATER_EQUAL:
2666 cond_set = 0x9d;
2667 break;
2668
2669 case SLJIT_C_SIG_GREATER:
2670 cond_set = 0x9f;
2671 break;
2672
2673 case SLJIT_C_SIG_LESS_EQUAL:
2674 cond_set = 0x9e;
2675 break;
2676
2677 case SLJIT_C_OVERFLOW:
2678 case SLJIT_C_MUL_OVERFLOW:
2679 cond_set = 0x90;
2680 break;
2681
2682 case SLJIT_C_NOT_OVERFLOW:
2683 case SLJIT_C_MUL_NOT_OVERFLOW:
2684 cond_set = 0x91;
2685 break;
2686
2687 case SLJIT_C_FLOAT_NAN:
2688 cond_set = 0x9a;
2689 break;
2690
2691 case SLJIT_C_FLOAT_NOT_NAN:
2692 cond_set = 0x9b;
2693 break;
2694 }
2695
2696 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2697 reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2698
2699 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2700 FAIL_IF(!buf);
2701 INC_SIZE(4 + 4);
2702 /* Set low register to conditional flag. */
2703 *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2704 *buf++ = 0x0f;
2705 *buf++ = cond_set;
2706 *buf++ = 0xC0 | reg_lmap[reg];
2707 *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2708 *buf++ = 0x0f;
2709 *buf++ = 0xb6;
2710 *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2711
2712 if (reg == TMP_REGISTER) {
2713 if (op == SLJIT_MOV) {
2714 compiler->mode32 = 0;
2715 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2716 }
2717 else {
2718 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2719 compiler->skip_checks = 1;
2720 #endif
2721 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2722 }
2723 }
2724 #else
2725 if (op == SLJIT_MOV) {
2726 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2727 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2728 FAIL_IF(!buf);
2729 INC_SIZE(3 + 3);
2730 /* Set low byte to conditional flag. */
2731 *buf++ = 0x0f;
2732 *buf++ = cond_set;
2733 *buf++ = 0xC0 | reg_map[dst];
2734
2735 *buf++ = 0x0f;
2736 *buf++ = 0xb6;
2737 *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2738 }
2739 else {
2740 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2741
2742 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2743 FAIL_IF(!buf);
2744 INC_SIZE(3 + 3);
2745 /* Set al to conditional flag. */
2746 *buf++ = 0x0f;
2747 *buf++ = cond_set;
2748 *buf++ = 0xC0;
2749
2750 *buf++ = 0x0f;
2751 *buf++ = 0xb6;
2752 if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
2753 *buf = 0xC0 | (reg_map[dst] << 3);
2754 else {
2755 *buf = 0xC0;
2756 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2757 }
2758
2759 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2760 }
2761 }
2762 else {
2763 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2764 EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2765 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2766 FAIL_IF(!buf);
2767 INC_SIZE(3);
2768
2769 *buf++ = 0x0f;
2770 *buf++ = cond_set;
2771 *buf++ = 0xC0 | reg_map[dst];
2772 }
2773 else {
2774 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2775
2776 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2777 FAIL_IF(!buf);
2778 INC_SIZE(3 + 3 + 1);
2779 /* Set al to conditional flag. */
2780 *buf++ = 0x0f;
2781 *buf++ = cond_set;
2782 *buf++ = 0xC0;
2783
2784 *buf++ = 0x0f;
2785 *buf++ = 0xb6;
2786 *buf++ = 0xC0;
2787
2788 *buf++ = 0x90 + reg_map[TMP_REGISTER];
2789 }
2790 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2791 compiler->skip_checks = 1;
2792 #endif
2793 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2794 }
2795 #endif
2796
2797 return SLJIT_SUCCESS;
2798 }
2799
2800 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2801 {
2802 sljit_ub *buf;
2803 struct sljit_const *const_;
2804 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2805 int reg;
2806 #endif
2807
2808 CHECK_ERROR_PTR();
2809 check_sljit_emit_const(compiler, dst, dstw, init_value);
2810
2811 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2812
2813 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2814 PTR_FAIL_IF(!const_);
2815 set_const(const_, compiler);
2816
2817 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2818 compiler->mode32 = 0;
2819 reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2820
2821 if (emit_load_imm64(compiler, reg, init_value))
2822 return NULL;
2823 #else
2824 if (dst == SLJIT_UNUSED)
2825 dst = TMP_REGISTER;
2826
2827 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2828 return NULL;
2829 #endif
2830
2831 buf = (sljit_ub*)ensure_buf(compiler, 2);
2832 PTR_FAIL_IF(!buf);
2833
2834 *buf++ = 0;
2835 *buf++ = 1;
2836
2837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2838 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2839 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2840 return NULL;
2841 #endif
2842
2843 return const_;
2844 }
2845
2846 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2847 {
2848 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2849 *(sljit_w*)addr = new_addr - (addr + 4);
2850 #else
2851 *(sljit_uw*)addr = new_addr;
2852 #endif
2853 }
2854
2855 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2856 {
2857 *(sljit_w*)addr = new_constant;
2858 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12