/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 884 - (show annotations) (download)
Tue Jan 17 11:52:43 2012 UTC (2 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 79808 byte(s)
JIT test prints cpu info
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 do; \
78 } \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_w); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 do; \
83 }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
112 };
113 #endif
114
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
120
121 typedef unsigned int sljit_uhw;
122 typedef int sljit_hw;
123
124 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
125 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
132 #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
133 #endif
134
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS 0x0010
137 #define EX86_SHIFT_INS 0x0020
138 #define EX86_REX 0x0040
139 #define EX86_NO_REXW 0x0080
140 #define EX86_BYTE_ARG 0x0100
141 #define EX86_HALF_ARG 0x0200
142 #define EX86_PREF_66 0x0400
143
144 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
145 #define EX86_PREF_F2 0x0800
146 #define EX86_SSE2 0x1000
147 #endif
148
149 #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
150 #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
151
152 #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
153 #define POP_REG(r) (*buf++ = (0x58 + (r)))
154 #define RET() (*buf++ = (0xc3))
155 #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
156 /* r32, r/m32 */
157 #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
158
159 static sljit_ub get_jump_code(int type)
160 {
161 switch (type) {
162 case SLJIT_C_EQUAL:
163 case SLJIT_C_FLOAT_EQUAL:
164 return 0x84;
165
166 case SLJIT_C_NOT_EQUAL:
167 case SLJIT_C_FLOAT_NOT_EQUAL:
168 return 0x85;
169
170 case SLJIT_C_LESS:
171 case SLJIT_C_FLOAT_LESS:
172 return 0x82;
173
174 case SLJIT_C_GREATER_EQUAL:
175 case SLJIT_C_FLOAT_GREATER_EQUAL:
176 return 0x83;
177
178 case SLJIT_C_GREATER:
179 case SLJIT_C_FLOAT_GREATER:
180 return 0x87;
181
182 case SLJIT_C_LESS_EQUAL:
183 case SLJIT_C_FLOAT_LESS_EQUAL:
184 return 0x86;
185
186 case SLJIT_C_SIG_LESS:
187 return 0x8c;
188
189 case SLJIT_C_SIG_GREATER_EQUAL:
190 return 0x8d;
191
192 case SLJIT_C_SIG_GREATER:
193 return 0x8f;
194
195 case SLJIT_C_SIG_LESS_EQUAL:
196 return 0x8e;
197
198 case SLJIT_C_OVERFLOW:
199 case SLJIT_C_MUL_OVERFLOW:
200 return 0x80;
201
202 case SLJIT_C_NOT_OVERFLOW:
203 case SLJIT_C_MUL_NOT_OVERFLOW:
204 return 0x81;
205
206 case SLJIT_C_FLOAT_NAN:
207 return 0x8a;
208
209 case SLJIT_C_FLOAT_NOT_NAN:
210 return 0x8b;
211 }
212 return 0;
213 }
214
215 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
216
217 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
218 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
219 #endif
220
221 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
222 {
223 int short_jump;
224 sljit_uw label_addr;
225
226 if (jump->flags & JUMP_LABEL)
227 label_addr = (sljit_uw)(code + jump->u.label->size);
228 else
229 label_addr = jump->u.target;
230 short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
231
232 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
233 if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
234 return generate_far_jump_code(jump, code_ptr, type);
235 #endif
236
237 if (type == SLJIT_JUMP) {
238 if (short_jump)
239 *code_ptr++ = 0xeb;
240 else
241 *code_ptr++ = 0xe9;
242 jump->addr++;
243 }
244 else if (type >= SLJIT_FAST_CALL) {
245 short_jump = 0;
246 *code_ptr++ = 0xe8;
247 jump->addr++;
248 }
249 else if (short_jump) {
250 *code_ptr++ = get_jump_code(type) - 0x10;
251 jump->addr++;
252 }
253 else {
254 *code_ptr++ = 0x0f;
255 *code_ptr++ = get_jump_code(type);
256 jump->addr += 2;
257 }
258
259 if (short_jump) {
260 jump->flags |= PATCH_MB;
261 code_ptr += sizeof(sljit_b);
262 } else {
263 jump->flags |= PATCH_MW;
264 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
265 code_ptr += sizeof(sljit_w);
266 #else
267 code_ptr += sizeof(sljit_hw);
268 #endif
269 }
270
271 return code_ptr;
272 }
273
274 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
275 {
276 struct sljit_memory_fragment *buf;
277 sljit_ub *code;
278 sljit_ub *code_ptr;
279 sljit_ub *buf_ptr;
280 sljit_ub *buf_end;
281 sljit_ub len;
282
283 struct sljit_label *label;
284 struct sljit_jump *jump;
285 struct sljit_const *const_;
286
287 CHECK_ERROR_PTR();
288 check_sljit_generate_code(compiler);
289 reverse_buf(compiler);
290
291 /* Second code generation pass. */
292 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
293 PTR_FAIL_WITH_EXEC_IF(code);
294 buf = compiler->buf;
295
296 code_ptr = code;
297 label = compiler->labels;
298 jump = compiler->jumps;
299 const_ = compiler->consts;
300 do {
301 buf_ptr = buf->memory;
302 buf_end = buf_ptr + buf->used_size;
303 do {
304 len = *buf_ptr++;
305 if (len > 0) {
306 /* The code is already generated. */
307 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
308 code_ptr += len;
309 buf_ptr += len;
310 }
311 else {
312 if (*buf_ptr >= 4) {
313 jump->addr = (sljit_uw)code_ptr;
314 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
315 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
316 else
317 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
318 jump = jump->next;
319 }
320 else if (*buf_ptr == 0) {
321 label->addr = (sljit_uw)code_ptr;
322 label->size = code_ptr - code;
323 label = label->next;
324 }
325 else if (*buf_ptr == 1) {
326 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
327 const_ = const_->next;
328 }
329 else {
330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
331 *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
332 buf_ptr++;
333 *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
334 code_ptr += sizeof(sljit_w);
335 buf_ptr += sizeof(sljit_w) - 1;
336 #else
337 code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
338 buf_ptr += sizeof(sljit_w);
339 #endif
340 }
341 buf_ptr++;
342 }
343 } while (buf_ptr < buf_end);
344 SLJIT_ASSERT(buf_ptr == buf_end);
345 buf = buf->next;
346 } while (buf);
347
348 SLJIT_ASSERT(!label);
349 SLJIT_ASSERT(!jump);
350 SLJIT_ASSERT(!const_);
351
352 jump = compiler->jumps;
353 while (jump) {
354 if (jump->flags & PATCH_MB) {
355 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
356 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
357 } else if (jump->flags & PATCH_MW) {
358 if (jump->flags & JUMP_LABEL) {
359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
360 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
361 #else
362 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
363 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
364 #endif
365 }
366 else {
367 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
368 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
369 #else
370 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
371 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
372 #endif
373 }
374 }
375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
376 else if (jump->flags & PATCH_MD)
377 *(sljit_w*)jump->addr = jump->u.label->addr;
378 #endif
379
380 jump = jump->next;
381 }
382
383 /* Maybe we waste some space because of short jumps. */
384 SLJIT_ASSERT(code_ptr <= code + compiler->size);
385 compiler->error = SLJIT_ERR_COMPILED;
386 compiler->executable_size = compiler->size;
387 return (void*)code;
388 }
389
390 /* --------------------------------------------------------------------- */
391 /* Operators */
392 /* --------------------------------------------------------------------- */
393
394 static int emit_cum_binary(struct sljit_compiler *compiler,
395 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
396 int dst, sljit_w dstw,
397 int src1, sljit_w src1w,
398 int src2, sljit_w src2w);
399
400 static int emit_non_cum_binary(struct sljit_compiler *compiler,
401 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
402 int dst, sljit_w dstw,
403 int src1, sljit_w src1w,
404 int src2, sljit_w src2w);
405
406 static int emit_mov(struct sljit_compiler *compiler,
407 int dst, sljit_w dstw,
408 int src, sljit_w srcw);
409
410 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
411 {
412 sljit_ub *buf;
413
414 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
415 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
416 FAIL_IF(!buf);
417 INC_SIZE(5);
418 *buf++ = 0x9c; /* pushfd */
419 #else
420 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
421 FAIL_IF(!buf);
422 INC_SIZE(6);
423 *buf++ = 0x9c; /* pushfq */
424 *buf++ = 0x48;
425 #endif
426 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
427 *buf++ = 0x64;
428 *buf++ = 0x24;
429 *buf++ = sizeof(sljit_w);
430 compiler->flags_saved = 1;
431 return SLJIT_SUCCESS;
432 }
433
434 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
435 {
436 sljit_ub *buf;
437
438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
439 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
440 FAIL_IF(!buf);
441 INC_SIZE(5);
442 #else
443 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
444 FAIL_IF(!buf);
445 INC_SIZE(6);
446 *buf++ = 0x48;
447 #endif
448 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
449 *buf++ = 0x64;
450 *buf++ = 0x24;
451 *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
452 *buf++ = 0x9d; /* popfd / popfq */
453 compiler->flags_saved = keep_flags;
454 return SLJIT_SUCCESS;
455 }
456
457 #ifdef _WIN32
458 #include <malloc.h>
459
460 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
461 {
462 /* Workaround for calling _chkstk. */
463 alloca(local_size);
464 }
465 #endif
466
467 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
468 #include "sljitNativeX86_32.c"
469 #else
470 #include "sljitNativeX86_64.c"
471 #endif
472
473 static int emit_mov(struct sljit_compiler *compiler,
474 int dst, sljit_w dstw,
475 int src, sljit_w srcw)
476 {
477 sljit_ub* code;
478
479 if (dst == SLJIT_UNUSED) {
480 /* No destination, doesn't need to setup flags. */
481 if (src & SLJIT_MEM) {
482 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
483 FAIL_IF(!code);
484 *code = 0x8b;
485 }
486 return SLJIT_SUCCESS;
487 }
488 if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
489 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
490 FAIL_IF(!code);
491 *code = 0x89;
492 return SLJIT_SUCCESS;
493 }
494 if (src & SLJIT_IMM) {
495 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
496 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
497 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
498 #else
499 if (!compiler->mode32) {
500 if (NOT_HALFWORD(srcw))
501 return emit_load_imm64(compiler, dst, srcw);
502 }
503 else
504 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
505 #endif
506 }
507 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
508 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
509 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
510 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
511 FAIL_IF(!code);
512 *code = 0x89;
513 return SLJIT_SUCCESS;
514 }
515 #endif
516 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
517 FAIL_IF(!code);
518 *code = 0xc7;
519 return SLJIT_SUCCESS;
520 }
521 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
522 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
523 FAIL_IF(!code);
524 *code = 0x8b;
525 return SLJIT_SUCCESS;
526 }
527
528 /* Memory to memory move. Requires two instruction. */
529 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
530 FAIL_IF(!code);
531 *code = 0x8b;
532 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
533 FAIL_IF(!code);
534 *code = 0x89;
535 return SLJIT_SUCCESS;
536 }
537
538 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
539 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
540
541 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
542 {
543 sljit_ub *buf;
544 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
545 int size;
546 #endif
547
548 CHECK_ERROR();
549 check_sljit_emit_op0(compiler, op);
550
551 switch (GET_OPCODE(op)) {
552 case SLJIT_BREAKPOINT:
553 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
554 FAIL_IF(!buf);
555 INC_SIZE(1);
556 *buf = 0xcc;
557 break;
558 case SLJIT_NOP:
559 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
560 FAIL_IF(!buf);
561 INC_SIZE(1);
562 *buf = 0x90;
563 break;
564 case SLJIT_UMUL:
565 case SLJIT_SMUL:
566 case SLJIT_UDIV:
567 case SLJIT_SDIV:
568 compiler->flags_saved = 0;
569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
570 #ifdef _WIN64
571 SLJIT_COMPILE_ASSERT(
572 reg_map[SLJIT_TEMPORARY_REG1] == 0
573 && reg_map[SLJIT_TEMPORARY_REG2] == 2
574 && reg_map[TMP_REGISTER] > 7,
575 invalid_register_assignment_for_div_mul);
576 #else
577 SLJIT_COMPILE_ASSERT(
578 reg_map[SLJIT_TEMPORARY_REG1] == 0
579 && reg_map[SLJIT_TEMPORARY_REG2] < 7
580 && reg_map[TMP_REGISTER] == 2,
581 invalid_register_assignment_for_div_mul);
582 #endif
583 compiler->mode32 = op & SLJIT_INT_OP;
584 #endif
585
586 op = GET_OPCODE(op);
587 if (op == SLJIT_UDIV) {
588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
589 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
590 buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
591 #else
592 buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
593 #endif
594 FAIL_IF(!buf);
595 *buf = 0x33;
596 }
597
598 if (op == SLJIT_SDIV) {
599 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
600 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
601 #endif
602
603 /* CDQ instruction */
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
606 FAIL_IF(!buf);
607 INC_SIZE(1);
608 *buf = 0x99;
609 #else
610 if (compiler->mode32) {
611 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
612 FAIL_IF(!buf);
613 INC_SIZE(1);
614 *buf = 0x99;
615 } else {
616 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
617 FAIL_IF(!buf);
618 INC_SIZE(2);
619 *buf++ = REX_W;
620 *buf = 0x99;
621 }
622 #endif
623 }
624
625 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
626 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
627 FAIL_IF(!buf);
628 INC_SIZE(2);
629 *buf++ = 0xf7;
630 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
631 #else
632 #ifdef _WIN64
633 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
634 #else
635 size = (!compiler->mode32) ? 3 : 2;
636 #endif
637 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
638 FAIL_IF(!buf);
639 INC_SIZE(size);
640 #ifdef _WIN64
641 if (!compiler->mode32)
642 *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
643 else if (op >= SLJIT_UDIV)
644 *buf++ = REX_B;
645 *buf++ = 0xf7;
646 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
647 #else
648 if (!compiler->mode32)
649 *buf++ = REX_W;
650 *buf++ = 0xf7;
651 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
652 #endif
653 #endif
654 switch (op) {
655 case SLJIT_UMUL:
656 *buf |= 4 << 3;
657 break;
658 case SLJIT_SMUL:
659 *buf |= 5 << 3;
660 break;
661 case SLJIT_UDIV:
662 *buf |= 6 << 3;
663 break;
664 case SLJIT_SDIV:
665 *buf |= 7 << 3;
666 break;
667 }
668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
669 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
670 #endif
671 break;
672 }
673
674 return SLJIT_SUCCESS;
675 }
676
677 #define ENCODE_PREFIX(prefix) \
678 do { \
679 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
680 FAIL_IF(!code); \
681 INC_CSIZE(1); \
682 *code = (prefix); \
683 } while (0)
684
685 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
686 int dst, sljit_w dstw,
687 int src, sljit_w srcw)
688 {
689 sljit_ub* code;
690 int dst_r;
691 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
692 int work_r;
693 #endif
694
695 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
696 compiler->mode32 = 0;
697 #endif
698
699 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
700 return SLJIT_SUCCESS; /* Empty instruction. */
701
702 if (src & SLJIT_IMM) {
703 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
704 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
705 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
706 #else
707 return emit_load_imm64(compiler, dst, srcw);
708 #endif
709 }
710 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
711 FAIL_IF(!code);
712 *code = 0xc6;
713 return SLJIT_SUCCESS;
714 }
715
716 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
717
718 if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
719 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
720 if (reg_map[src] >= 4) {
721 SLJIT_ASSERT(dst_r == TMP_REGISTER);
722 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
723 } else
724 dst_r = src;
725 #else
726 dst_r = src;
727 #endif
728 }
729 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
730 else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
731 /* src, dst are registers. */
732 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
733 if (reg_map[dst] < 4) {
734 if (dst != src)
735 EMIT_MOV(compiler, dst, 0, src, 0);
736 code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
737 FAIL_IF(!code);
738 *code++ = 0x0f;
739 *code = sign ? 0xbe : 0xb6;
740 }
741 else {
742 if (dst != src)
743 EMIT_MOV(compiler, dst, 0, src, 0);
744 if (sign) {
745 /* shl reg, 24 */
746 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
747 FAIL_IF(!code);
748 *code |= 0x4 << 3;
749 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
750 FAIL_IF(!code);
751 /* shr/sar reg, 24 */
752 *code |= 0x7 << 3;
753 }
754 else {
755 /* and dst, 0xff */
756 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
757 FAIL_IF(!code);
758 *(code + 1) |= 0x4 << 3;
759 }
760 }
761 return SLJIT_SUCCESS;
762 }
763 #endif
764 else {
765 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
766 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
767 FAIL_IF(!code);
768 *code++ = 0x0f;
769 *code = sign ? 0xbe : 0xb6;
770 }
771
772 if (dst & SLJIT_MEM) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
774 if (dst_r == TMP_REGISTER) {
775 /* Find a non-used register, whose reg_map[src] < 4. */
776 if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
777 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
778 work_r = SLJIT_TEMPORARY_REG3;
779 else
780 work_r = SLJIT_TEMPORARY_REG2;
781 }
782 else {
783 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
784 work_r = SLJIT_TEMPORARY_REG1;
785 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
786 work_r = SLJIT_TEMPORARY_REG3;
787 else
788 work_r = SLJIT_TEMPORARY_REG2;
789 }
790
791 if (work_r == SLJIT_TEMPORARY_REG1) {
792 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
793 }
794 else {
795 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
796 FAIL_IF(!code);
797 *code = 0x87;
798 }
799
800 code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
801 FAIL_IF(!code);
802 *code = 0x88;
803
804 if (work_r == SLJIT_TEMPORARY_REG1) {
805 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
806 }
807 else {
808 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
809 FAIL_IF(!code);
810 *code = 0x87;
811 }
812 }
813 else {
814 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
815 FAIL_IF(!code);
816 *code = 0x88;
817 }
818 #else
819 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
820 FAIL_IF(!code);
821 *code = 0x88;
822 #endif
823 }
824
825 return SLJIT_SUCCESS;
826 }
827
828 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
829 int dst, sljit_w dstw,
830 int src, sljit_w srcw)
831 {
832 sljit_ub* code;
833 int dst_r;
834
835 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
836 compiler->mode32 = 0;
837 #endif
838
839 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
840 return SLJIT_SUCCESS; /* Empty instruction. */
841
842 if (src & SLJIT_IMM) {
843 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
844 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
845 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
846 #else
847 return emit_load_imm64(compiler, dst, srcw);
848 #endif
849 }
850 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
851 FAIL_IF(!code);
852 *code = 0xc7;
853 return SLJIT_SUCCESS;
854 }
855
856 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
857
858 if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
859 dst_r = src;
860 else {
861 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
862 FAIL_IF(!code);
863 *code++ = 0x0f;
864 *code = sign ? 0xbf : 0xb7;
865 }
866
867 if (dst & SLJIT_MEM) {
868 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
869 FAIL_IF(!code);
870 *code = 0x89;
871 }
872
873 return SLJIT_SUCCESS;
874 }
875
876 static int emit_unary(struct sljit_compiler *compiler, int un_index,
877 int dst, sljit_w dstw,
878 int src, sljit_w srcw)
879 {
880 sljit_ub* code;
881
882 if (dst == SLJIT_UNUSED) {
883 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
884 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
885 FAIL_IF(!code);
886 *code++ = 0xf7;
887 *code |= (un_index) << 3;
888 return SLJIT_SUCCESS;
889 }
890 if (dst == src && dstw == srcw) {
891 /* Same input and output */
892 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
893 FAIL_IF(!code);
894 *code++ = 0xf7;
895 *code |= (un_index) << 3;
896 return SLJIT_SUCCESS;
897 }
898 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
899 EMIT_MOV(compiler, dst, 0, src, srcw);
900 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
901 FAIL_IF(!code);
902 *code++ = 0xf7;
903 *code |= (un_index) << 3;
904 return SLJIT_SUCCESS;
905 }
906 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
907 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
908 FAIL_IF(!code);
909 *code++ = 0xf7;
910 *code |= (un_index) << 3;
911 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
912 return SLJIT_SUCCESS;
913 }
914
915 static int emit_not_with_flags(struct sljit_compiler *compiler,
916 int dst, sljit_w dstw,
917 int src, sljit_w srcw)
918 {
919 sljit_ub* code;
920
921 if (dst == SLJIT_UNUSED) {
922 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
923 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
924 FAIL_IF(!code);
925 *code++ = 0xf7;
926 *code |= 0x2 << 3;
927 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
928 FAIL_IF(!code);
929 *code = 0x0b;
930 return SLJIT_SUCCESS;
931 }
932 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
933 EMIT_MOV(compiler, dst, 0, src, srcw);
934 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
935 FAIL_IF(!code);
936 *code++ = 0xf7;
937 *code |= 0x2 << 3;
938 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
939 FAIL_IF(!code);
940 *code = 0x0b;
941 return SLJIT_SUCCESS;
942 }
943 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
944 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
945 FAIL_IF(!code);
946 *code++ = 0xf7;
947 *code |= 0x2 << 3;
948 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
949 FAIL_IF(!code);
950 *code = 0x0b;
951 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
952 return SLJIT_SUCCESS;
953 }
954
955 static int emit_clz(struct sljit_compiler *compiler, int op,
956 int dst, sljit_w dstw,
957 int src, sljit_w srcw)
958 {
959 sljit_ub* code;
960 int dst_r;
961
962 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
963 /* Just set the zero flag. */
964 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
965 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
966 FAIL_IF(!code);
967 *code++ = 0xf7;
968 *code |= 0x2 << 3;
969 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
970 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
971 #else
972 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
973 #endif
974 FAIL_IF(!code);
975 *code |= 0x5 << 3;
976 return SLJIT_SUCCESS;
977 }
978
979 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
980 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
981 src = TMP_REGISTER;
982 srcw = 0;
983 }
984
985 code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
986 FAIL_IF(!code);
987 *code++ = 0x0f;
988 *code = 0xbd;
989
990 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
991 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
992 dst_r = dst;
993 else {
994 /* Find an unused temporary register. */
995 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
996 dst_r = SLJIT_TEMPORARY_REG1;
997 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
998 dst_r = SLJIT_TEMPORARY_REG2;
999 else
1000 dst_r = SLJIT_TEMPORARY_REG3;
1001 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1002 }
1003 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1004 #else
1005 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1006 compiler->mode32 = 0;
1007 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1008 compiler->mode32 = op & SLJIT_INT_OP;
1009 #endif
1010
1011 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1012 FAIL_IF(!code);
1013 *code++ = 0x0f;
1014 *code = 0x45;
1015
1016 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1017 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1018 #else
1019 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1020 #endif
1021 FAIL_IF(!code);
1022 *(code + 1) |= 0x6 << 3;
1023
1024 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1025 if (dst & SLJIT_MEM) {
1026 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1027 FAIL_IF(!code);
1028 *code = 0x87;
1029 }
1030 #else
1031 if (dst & SLJIT_MEM)
1032 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1033 #endif
1034 return SLJIT_SUCCESS;
1035 }
1036
1037 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1038 int dst, sljit_w dstw,
1039 int src, sljit_w srcw)
1040 {
1041 sljit_ub* code;
1042 int update = 0;
1043 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1044 int dst_is_ereg = 0;
1045 int src_is_ereg = 0;
1046 #else
1047 #define src_is_ereg 0
1048 #endif
1049
1050 CHECK_ERROR();
1051 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1052
1053 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1054 compiler->mode32 = op & SLJIT_INT_OP;
1055 #endif
1056 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1057 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1058
1059 if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1060 op = GET_OPCODE(op);
1061 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1062 compiler->mode32 = 0;
1063 #endif
1064
1065 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1066 if (op >= SLJIT_MOVU) {
1067 update = 1;
1068 op -= 7;
1069 }
1070
1071 if (src & SLJIT_IMM) {
1072 switch (op) {
1073 case SLJIT_MOV_UB:
1074 srcw = (unsigned char)srcw;
1075 break;
1076 case SLJIT_MOV_SB:
1077 srcw = (signed char)srcw;
1078 break;
1079 case SLJIT_MOV_UH:
1080 srcw = (unsigned short)srcw;
1081 break;
1082 case SLJIT_MOV_SH:
1083 srcw = (signed short)srcw;
1084 break;
1085 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1086 case SLJIT_MOV_UI:
1087 srcw = (unsigned int)srcw;
1088 break;
1089 case SLJIT_MOV_SI:
1090 srcw = (signed int)srcw;
1091 break;
1092 #endif
1093 }
1094 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1095 if (SLJIT_UNLIKELY(dst_is_ereg))
1096 return emit_mov(compiler, dst, dstw, src, srcw);
1097 #endif
1098 }
1099
1100 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1101 code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1102 FAIL_IF(!code);
1103 *code = 0x8d;
1104 src &= SLJIT_MEM | 0xf;
1105 srcw = 0;
1106 }
1107
1108 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1109 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1110 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1111 dst = TMP_REGISTER;
1112 }
1113 #endif
1114
1115 switch (op) {
1116 case SLJIT_MOV:
1117 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1118 case SLJIT_MOV_UI:
1119 case SLJIT_MOV_SI:
1120 #endif
1121 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1122 break;
1123 case SLJIT_MOV_UB:
1124 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1125 break;
1126 case SLJIT_MOV_SB:
1127 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1128 break;
1129 case SLJIT_MOV_UH:
1130 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1131 break;
1132 case SLJIT_MOV_SH:
1133 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1134 break;
1135 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1136 case SLJIT_MOV_UI:
1137 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1138 break;
1139 case SLJIT_MOV_SI:
1140 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1141 break;
1142 #endif
1143 }
1144
1145 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1146 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1147 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1148 #endif
1149
1150 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1151 code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1152 FAIL_IF(!code);
1153 *code = 0x8d;
1154 }
1155 return SLJIT_SUCCESS;
1156 }
1157
1158 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1159 compiler->flags_saved = 0;
1160
1161 switch (GET_OPCODE(op)) {
1162 case SLJIT_NOT:
1163 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1164 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1165 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1166
1167 case SLJIT_NEG:
1168 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1169 FAIL_IF(emit_save_flags(compiler));
1170 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1171
1172 case SLJIT_CLZ:
1173 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1174 FAIL_IF(emit_save_flags(compiler));
1175 return emit_clz(compiler, op, dst, dstw, src, srcw);
1176 }
1177
1178 return SLJIT_SUCCESS;
1179
1180 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1181 #undef src_is_ereg
1182 #endif
1183 }
1184
1185 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1186
1187 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1188 if (IS_HALFWORD(immw) || compiler->mode32) { \
1189 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1190 FAIL_IF(!code); \
1191 *(code + 1) |= (_op_imm_); \
1192 } \
1193 else { \
1194 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1195 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1196 FAIL_IF(!code); \
1197 *code = (_op_mr_); \
1198 }
1199
1200 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1201 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1202
1203 #else
1204
1205 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1206 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1207 FAIL_IF(!code); \
1208 *(code + 1) |= (_op_imm_);
1209
1210 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1211 FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1212
1213 #endif
1214
1215 static int emit_cum_binary(struct sljit_compiler *compiler,
1216 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1217 int dst, sljit_w dstw,
1218 int src1, sljit_w src1w,
1219 int src2, sljit_w src2w)
1220 {
1221 sljit_ub* code;
1222
1223 if (dst == SLJIT_UNUSED) {
1224 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1225 if (src2 & SLJIT_IMM) {
1226 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1227 }
1228 else {
1229 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1230 FAIL_IF(!code);
1231 *code = op_rm;
1232 }
1233 return SLJIT_SUCCESS;
1234 }
1235
1236 if (dst == src1 && dstw == src1w) {
1237 if (src2 & SLJIT_IMM) {
1238 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1239 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1240 #else
1241 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1242 #endif
1243 BINARY_EAX_IMM(op_eax_imm, src2w);
1244 }
1245 else {
1246 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1247 }
1248 }
1249 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1250 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1251 FAIL_IF(!code);
1252 *code = op_rm;
1253 }
1254 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1255 /* Special exception for sljit_emit_cond_value. */
1256 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1257 FAIL_IF(!code);
1258 *code = op_mr;
1259 }
1260 else {
1261 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1262 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1263 FAIL_IF(!code);
1264 *code = op_mr;
1265 }
1266 return SLJIT_SUCCESS;
1267 }
1268
1269 /* Only for cumulative operations. */
1270 if (dst == src2 && dstw == src2w) {
1271 if (src1 & SLJIT_IMM) {
1272 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1273 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1274 #else
1275 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1276 #endif
1277 BINARY_EAX_IMM(op_eax_imm, src1w);
1278 }
1279 else {
1280 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1281 }
1282 }
1283 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1284 code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1285 FAIL_IF(!code);
1286 *code = op_rm;
1287 }
1288 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1289 code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1290 FAIL_IF(!code);
1291 *code = op_mr;
1292 }
1293 else {
1294 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1295 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1296 FAIL_IF(!code);
1297 *code = op_mr;
1298 }
1299 return SLJIT_SUCCESS;
1300 }
1301
1302 /* General version. */
1303 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1304 EMIT_MOV(compiler, dst, 0, src1, src1w);
1305 if (src2 & SLJIT_IMM) {
1306 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1307 }
1308 else {
1309 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1310 FAIL_IF(!code);
1311 *code = op_rm;
1312 }
1313 }
1314 else {
1315 /* This version requires less memory writing. */
1316 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1317 if (src2 & SLJIT_IMM) {
1318 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1319 }
1320 else {
1321 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1322 FAIL_IF(!code);
1323 *code = op_rm;
1324 }
1325 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1326 }
1327
1328 return SLJIT_SUCCESS;
1329 }
1330
1331 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1332 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1333 int dst, sljit_w dstw,
1334 int src1, sljit_w src1w,
1335 int src2, sljit_w src2w)
1336 {
1337 sljit_ub* code;
1338
1339 if (dst == SLJIT_UNUSED) {
1340 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1341 if (src2 & SLJIT_IMM) {
1342 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1343 }
1344 else {
1345 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1346 FAIL_IF(!code);
1347 *code = op_rm;
1348 }
1349 return SLJIT_SUCCESS;
1350 }
1351
1352 if (dst == src1 && dstw == src1w) {
1353 if (src2 & SLJIT_IMM) {
1354 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1355 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1356 #else
1357 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1358 #endif
1359 BINARY_EAX_IMM(op_eax_imm, src2w);
1360 }
1361 else {
1362 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1363 }
1364 }
1365 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1366 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1367 FAIL_IF(!code);
1368 *code = op_rm;
1369 }
1370 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1371 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1372 FAIL_IF(!code);
1373 *code = op_mr;
1374 }
1375 else {
1376 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1377 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1378 FAIL_IF(!code);
1379 *code = op_mr;
1380 }
1381 return SLJIT_SUCCESS;
1382 }
1383
1384 /* General version. */
1385 if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1386 EMIT_MOV(compiler, dst, 0, src1, src1w);
1387 if (src2 & SLJIT_IMM) {
1388 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1389 }
1390 else {
1391 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1392 FAIL_IF(!code);
1393 *code = op_rm;
1394 }
1395 }
1396 else {
1397 /* This version requires less memory writing. */
1398 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1399 if (src2 & SLJIT_IMM) {
1400 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1401 }
1402 else {
1403 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1404 FAIL_IF(!code);
1405 *code = op_rm;
1406 }
1407 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1408 }
1409
1410 return SLJIT_SUCCESS;
1411 }
1412
1413 static int emit_mul(struct sljit_compiler *compiler,
1414 int dst, sljit_w dstw,
1415 int src1, sljit_w src1w,
1416 int src2, sljit_w src2w)
1417 {
1418 sljit_ub* code;
1419 int dst_r;
1420
1421 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1422
1423 /* Register destination. */
1424 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1425 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1426 FAIL_IF(!code);
1427 *code++ = 0x0f;
1428 *code = 0xaf;
1429 }
1430 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1431 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1432 FAIL_IF(!code);
1433 *code++ = 0x0f;
1434 *code = 0xaf;
1435 }
1436 else if (src1 & SLJIT_IMM) {
1437 if (src2 & SLJIT_IMM) {
1438 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1439 src2 = dst_r;
1440 src2w = 0;
1441 }
1442
1443 if (src1w <= 127 && src1w >= -128) {
1444 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1445 FAIL_IF(!code);
1446 *code = 0x6b;
1447 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1448 FAIL_IF(!code);
1449 INC_CSIZE(1);
1450 *code = (sljit_b)src1w;
1451 }
1452 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1453 else {
1454 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1455 FAIL_IF(!code);
1456 *code = 0x69;
1457 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1458 FAIL_IF(!code);
1459 INC_CSIZE(4);
1460 *(sljit_w*)code = src1w;
1461 }
1462 #else
1463 else if (IS_HALFWORD(src1w)) {
1464 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1465 FAIL_IF(!code);
1466 *code = 0x69;
1467 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1468 FAIL_IF(!code);
1469 INC_CSIZE(4);
1470 *(sljit_hw*)code = (sljit_hw)src1w;
1471 }
1472 else {
1473 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1474 if (dst_r != src2)
1475 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1476 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1477 FAIL_IF(!code);
1478 *code++ = 0x0f;
1479 *code = 0xaf;
1480 }
1481 #endif
1482 }
1483 else if (src2 & SLJIT_IMM) {
1484 /* Note: src1 is NOT immediate. */
1485
1486 if (src2w <= 127 && src2w >= -128) {
1487 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1488 FAIL_IF(!code);
1489 *code = 0x6b;
1490 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1491 FAIL_IF(!code);
1492 INC_CSIZE(1);
1493 *code = (sljit_b)src2w;
1494 }
1495 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1496 else {
1497 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1498 FAIL_IF(!code);
1499 *code = 0x69;
1500 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1501 FAIL_IF(!code);
1502 INC_CSIZE(4);
1503 *(sljit_w*)code = src2w;
1504 }
1505 #else
1506 else if (IS_HALFWORD(src2w)) {
1507 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1508 FAIL_IF(!code);
1509 *code = 0x69;
1510 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1511 FAIL_IF(!code);
1512 INC_CSIZE(4);
1513 *(sljit_hw*)code = (sljit_hw)src2w;
1514 }
1515 else {
1516 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1517 if (dst_r != src1)
1518 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1519 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1520 FAIL_IF(!code);
1521 *code++ = 0x0f;
1522 *code = 0xaf;
1523 }
1524 #endif
1525 }
1526 else {
1527 /* Neither argument is immediate. */
1528 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1529 dst_r = TMP_REGISTER;
1530 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1531 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1532 FAIL_IF(!code);
1533 *code++ = 0x0f;
1534 *code = 0xaf;
1535 }
1536
1537 if (dst_r == TMP_REGISTER)
1538 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1539
1540 return SLJIT_SUCCESS;
1541 }
1542
1543 static int emit_lea_binary(struct sljit_compiler *compiler,
1544 int dst, sljit_w dstw,
1545 int src1, sljit_w src1w,
1546 int src2, sljit_w src2w)
1547 {
1548 sljit_ub* code;
1549 int dst_r, done = 0;
1550
1551 /* These cases better be left to handled by normal way. */
1552 if (dst == src1 && dstw == src1w)
1553 return SLJIT_ERR_UNSUPPORTED;
1554 if (dst == src2 && dstw == src2w)
1555 return SLJIT_ERR_UNSUPPORTED;
1556
1557 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1558
1559 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1560 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1561 /* It is not possible to be both SLJIT_LOCALS_REG. */
1562 if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1563 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1564 FAIL_IF(!code);
1565 *code = 0x8d;
1566 done = 1;
1567 }
1568 }
1569 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1570 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1571 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1572 #else
1573 if (src2 & SLJIT_IMM) {
1574 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1575 #endif
1576 FAIL_IF(!code);
1577 *code = 0x8d;
1578 done = 1;
1579 }
1580 }
1581 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1582 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1583 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1584 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1585 #else
1586 if (src1 & SLJIT_IMM) {
1587 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1588 #endif
1589 FAIL_IF(!code);
1590 *code = 0x8d;
1591 done = 1;
1592 }
1593 }
1594
1595 if (done) {
1596 if (dst_r == TMP_REGISTER)
1597 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1598 return SLJIT_SUCCESS;
1599 }
1600 return SLJIT_ERR_UNSUPPORTED;
1601 }
1602
1603 static int emit_cmp_binary(struct sljit_compiler *compiler,
1604 int src1, sljit_w src1w,
1605 int src2, sljit_w src2w)
1606 {
1607 sljit_ub* code;
1608
1609 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1610 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1611 #else
1612 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1613 #endif
1614 BINARY_EAX_IMM(0x3d, src2w);
1615 return SLJIT_SUCCESS;
1616 }
1617
1618 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1619 if (src2 & SLJIT_IMM) {
1620 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1621 }
1622 else {
1623 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1624 FAIL_IF(!code);
1625 *code = 0x3b;
1626 }
1627 return SLJIT_SUCCESS;
1628 }
1629
1630 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1631 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1632 FAIL_IF(!code);
1633 *code = 0x39;
1634 return SLJIT_SUCCESS;
1635 }
1636
1637 if (src2 & SLJIT_IMM) {
1638 if (src1 & SLJIT_IMM) {
1639 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1640 src1 = TMP_REGISTER;
1641 src1w = 0;
1642 }
1643 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1644 }
1645 else {
1646 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1647 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1648 FAIL_IF(!code);
1649 *code = 0x3b;
1650 }
1651 return SLJIT_SUCCESS;
1652 }
1653
1654 static int emit_test_binary(struct sljit_compiler *compiler,
1655 int src1, sljit_w src1w,
1656 int src2, sljit_w src2w)
1657 {
1658 sljit_ub* code;
1659
1660 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1661 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1662 #else
1663 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1664 #endif
1665 BINARY_EAX_IMM(0xa9, src2w);
1666 return SLJIT_SUCCESS;
1667 }
1668
1669 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1670 if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1671 #else
1672 if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1673 #endif
1674 BINARY_EAX_IMM(0xa9, src1w);
1675 return SLJIT_SUCCESS;
1676 }
1677
1678 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1679 if (src2 & SLJIT_IMM) {
1680 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1681 if (IS_HALFWORD(src2w) || compiler->mode32) {
1682 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1683 FAIL_IF(!code);
1684 *code = 0xf7;
1685 }
1686 else {
1687 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1688 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1689 FAIL_IF(!code);
1690 *code = 0x85;
1691 }
1692 #else
1693 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1694 FAIL_IF(!code);
1695 *code = 0xf7;
1696 #endif
1697 }
1698 else {
1699 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1700 FAIL_IF(!code);
1701 *code = 0x85;
1702 }
1703 return SLJIT_SUCCESS;
1704 }
1705
1706 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1707 if (src1 & SLJIT_IMM) {
1708 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1709 if (IS_HALFWORD(src1w) || compiler->mode32) {
1710 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1711 FAIL_IF(!code);
1712 *code = 0xf7;
1713 }
1714 else {
1715 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1716 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1717 FAIL_IF(!code);
1718 *code = 0x85;
1719 }
1720 #else
1721 code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1722 FAIL_IF(!code);
1723 *code = 0xf7;
1724 #endif
1725 }
1726 else {
1727 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1728 FAIL_IF(!code);
1729 *code = 0x85;
1730 }
1731 return SLJIT_SUCCESS;
1732 }
1733
1734 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1735 if (src2 & SLJIT_IMM) {
1736 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1737 if (IS_HALFWORD(src2w) || compiler->mode32) {
1738 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1739 FAIL_IF(!code);
1740 *code = 0xf7;
1741 }
1742 else {
1743 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1744 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1745 FAIL_IF(!code);
1746 *code = 0x85;
1747 }
1748 #else
1749 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1750 FAIL_IF(!code);
1751 *code = 0xf7;
1752 #endif
1753 }
1754 else {
1755 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1756 FAIL_IF(!code);
1757 *code = 0x85;
1758 }
1759 return SLJIT_SUCCESS;
1760 }
1761
1762 static int emit_shift(struct sljit_compiler *compiler,
1763 sljit_ub mode,
1764 int dst, sljit_w dstw,
1765 int src1, sljit_w src1w,
1766 int src2, sljit_w src2w)
1767 {
1768 sljit_ub* code;
1769
1770 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1771 if (dst == src1 && dstw == src1w) {
1772 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1773 FAIL_IF(!code);
1774 *code |= mode;
1775 return SLJIT_SUCCESS;
1776 }
1777 if (dst == SLJIT_UNUSED) {
1778 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1779 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1780 FAIL_IF(!code);
1781 *code |= mode;
1782 return SLJIT_SUCCESS;
1783 }
1784 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1785 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1786 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1787 FAIL_IF(!code);
1788 *code |= mode;
1789 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1790 return SLJIT_SUCCESS;
1791 }
1792 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1793 EMIT_MOV(compiler, dst, 0, src1, src1w);
1794 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1795 FAIL_IF(!code);
1796 *code |= mode;
1797 return SLJIT_SUCCESS;
1798 }
1799
1800 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1801 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1802 FAIL_IF(!code);
1803 *code |= mode;
1804 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1805 return SLJIT_SUCCESS;
1806 }
1807
1808 if (dst == SLJIT_PREF_SHIFT_REG) {
1809 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1810 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1811 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1812 FAIL_IF(!code);
1813 *code |= mode;
1814 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1815 }
1816 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1817 if (src1 != dst)
1818 EMIT_MOV(compiler, dst, 0, src1, src1w);
1819 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1820 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1821 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1822 FAIL_IF(!code);
1823 *code |= mode;
1824 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1825 }
1826 else {
1827 /* This case is really difficult, since ecx itself may used for
1828 addressing, and we must ensure to work even in that case. */
1829 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1830 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1831 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1832 #else
1833 /* [esp - 4] is reserved for eflags. */
1834 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1835 #endif
1836 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1837 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1838 FAIL_IF(!code);
1839 *code |= mode;
1840 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1841 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1842 #else
1843 /* [esp - 4] is reserved for eflags. */
1844 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1845 #endif
1846 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1847 }
1848
1849 return SLJIT_SUCCESS;
1850 }
1851
1852 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1853 sljit_ub mode, int set_flags,
1854 int dst, sljit_w dstw,
1855 int src1, sljit_w src1w,
1856 int src2, sljit_w src2w)
1857 {
1858 /* The CPU does not set flags if the shift count is 0. */
1859 if (src2 & SLJIT_IMM) {
1860 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1861 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1862 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1863 #else
1864 if ((src2w & 0x1f) != 0)
1865 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1866 #endif
1867 if (!set_flags)
1868 return emit_mov(compiler, dst, dstw, src1, src1w);
1869 /* OR dst, src, 0 */
1870 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1871 dst, dstw, src1, src1w, SLJIT_IMM, 0);
1872 }
1873
1874 if (!set_flags)
1875 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1876
1877 if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1878 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1879
1880 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1881
1882 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1883 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1884 return SLJIT_SUCCESS;
1885 }
1886
1887 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1888 int dst, sljit_w dstw,
1889 int src1, sljit_w src1w,
1890 int src2, sljit_w src2w)
1891 {
1892 CHECK_ERROR();
1893 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1894
1895 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1896 compiler->mode32 = op & SLJIT_INT_OP;
1897 #endif
1898 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1899 CHECK_EXTRA_REGS(src1, src1w, (void)0);
1900 CHECK_EXTRA_REGS(src2, src2w, (void)0);
1901
1902 if (GET_OPCODE(op) >= SLJIT_MUL) {
1903 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1904 compiler->flags_saved = 0;
1905 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1906 FAIL_IF(emit_save_flags(compiler));
1907 }
1908
1909 switch (GET_OPCODE(op)) {
1910 case SLJIT_ADD:
1911 if (!GET_FLAGS(op)) {
1912 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1913 return compiler->error;
1914 }
1915 else
1916 compiler->flags_saved = 0;
1917 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1918 FAIL_IF(emit_save_flags(compiler));
1919 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1920 dst, dstw, src1, src1w, src2, src2w);
1921 case SLJIT_ADDC:
1922 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1923 FAIL_IF(emit_restore_flags(compiler, 1));
1924 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1925 FAIL_IF(emit_save_flags(compiler));
1926 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1927 compiler->flags_saved = 0;
1928 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1929 dst, dstw, src1, src1w, src2, src2w);
1930 case SLJIT_SUB:
1931 if (!GET_FLAGS(op)) {
1932 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1933 return compiler->error;
1934 }
1935 else
1936 compiler->flags_saved = 0;
1937 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1938 FAIL_IF(emit_save_flags(compiler));
1939 if (dst == SLJIT_UNUSED)
1940 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1941 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1942 dst, dstw, src1, src1w, src2, src2w);
1943 case SLJIT_SUBC:
1944 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1945 FAIL_IF(emit_restore_flags(compiler, 1));
1946 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1947 FAIL_IF(emit_save_flags(compiler));
1948 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1949 compiler->flags_saved = 0;
1950 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1951 dst, dstw, src1, src1w, src2, src2w);
1952 case SLJIT_MUL:
1953 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1954 case SLJIT_AND:
1955 if (dst == SLJIT_UNUSED)
1956 return emit_test_binary(compiler, src1, src1w, src2, src2w);
1957 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1958 dst, dstw, src1, src1w, src2, src2w);
1959 case SLJIT_OR:
1960 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1961 dst, dstw, src1, src1w, src2, src2w);
1962 case SLJIT_XOR:
1963 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1964 dst, dstw, src1, src1w, src2, src2w);
1965 case SLJIT_SHL:
1966 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1967 dst, dstw, src1, src1w, src2, src2w);
1968 case SLJIT_LSHR:
1969 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1970 dst, dstw, src1, src1w, src2, src2w);
1971 case SLJIT_ASHR:
1972 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1973 dst, dstw, src1, src1w, src2, src2w);
1974 }
1975
1976 return SLJIT_SUCCESS;
1977 }
1978
1979 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1980 {
1981 check_sljit_get_register_index(reg);
1982 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1983 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1984 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
1985 return -1;
1986 #endif
1987 return reg_map[reg];
1988 }
1989
1990 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1991 void *instruction, int size)
1992 {
1993 sljit_ub *buf;
1994
1995 CHECK_ERROR();
1996 check_sljit_emit_op_custom(compiler, instruction, size);
1997 SLJIT_ASSERT(size > 0 && size < 16);
1998
1999 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2000 FAIL_IF(!buf);
2001 INC_SIZE(size);
2002 SLJIT_MEMMOVE(buf, instruction, size);
2003 return SLJIT_SUCCESS;
2004 }
2005
2006 /* --------------------------------------------------------------------- */
2007 /* Floating point operators */
2008 /* --------------------------------------------------------------------- */
2009
2010 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2011 static int sse2_available = 0;
2012 #endif
2013
2014 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2015
2016 /* Alignment + 2 * 16 bytes. */
2017 static sljit_i sse2_data[3 + 4 + 4];
2018 static sljit_i *sse2_buffer;
2019
2020 static void init_compiler()
2021 {
2022 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2023 int features = 0;
2024 #endif
2025
2026 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2027 sse2_buffer[0] = 0;
2028 sse2_buffer[1] = 0x80000000;
2029 sse2_buffer[4] = 0xffffffff;
2030 sse2_buffer[5] = 0x7fffffff;
2031
2032 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2033 #ifdef __GNUC__
2034 /* AT&T syntax. */
2035 asm (
2036 "pushl %%ebx\n"
2037 "movl $0x1, %%eax\n"
2038 "cpuid\n"
2039 "popl %%ebx\n"
2040 "movl %%edx, %0\n"
2041 : "=g" (features)
2042 :
2043 : "%eax", "%ecx", "%edx"
2044 );
2045 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2046 /* Intel syntax. */
2047 __asm {
2048 mov eax, 1
2049 push ebx
2050 cpuid
2051 pop ebx
2052 mov features, edx
2053 }
2054 #else
2055 #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2056 #endif
2057 sse2_available = (features >> 26) & 0x1;
2058 #endif
2059 }
2060
2061 #endif
2062
2063 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2064 {
2065 /* Always available. */
2066 return 1;
2067 }
2068
2069 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2070
2071 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2072 int xmm1, int xmm2, sljit_w xmm2w)
2073 {
2074 sljit_ub *buf;
2075
2076 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2077 FAIL_IF(!buf);
2078 *buf++ = 0x0f;
2079 *buf = opcode;
2080 return SLJIT_SUCCESS;
2081 }
2082
2083 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2084 int xmm1, int xmm2, sljit_w xmm2w)
2085 {
2086 sljit_ub *buf;
2087
2088 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2089 FAIL_IF(!buf);
2090 *buf++ = 0x0f;
2091 *buf = opcode;
2092 return SLJIT_SUCCESS;
2093 }
2094
2095 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2096 int dst, int src, sljit_w srcw)
2097 {
2098 return emit_sse2(compiler, 0x10, dst, src, srcw);
2099 }
2100
2101 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2102 int dst, sljit_w dstw, int src)
2103 {
2104 return emit_sse2(compiler, 0x11, src, dst, dstw);
2105 }
2106
2107 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2108 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2109 #else
2110 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2111 #endif
2112 int dst, sljit_w dstw,
2113 int src, sljit_w srcw)
2114 {
2115 int dst_r;
2116
2117 CHECK_ERROR();
2118 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2119
2120 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2121 compiler->mode32 = 1;
2122 #endif
2123
2124 if (GET_OPCODE(op) == SLJIT_FCMP) {
2125 compiler->flags_saved = 0;
2126 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2127 dst_r = dst;
2128 else {
2129 dst_r = TMP_FREG;
2130 FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2131 }
2132 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2133 }
2134
2135 if (op == SLJIT_FMOV) {
2136 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2137 return emit_sse2_load(compiler, dst, src, srcw);
2138 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2139 return emit_sse2_store(compiler, dst, dstw, src);
2140 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2141 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2142 }
2143
2144 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2145 dst_r = dst;
2146 if (dst != src)
2147 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2148 }
2149 else {
2150 dst_r = TMP_FREG;
2151 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2152 }
2153
2154 switch (op) {
2155 case SLJIT_FNEG:
2156 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2157 break;
2158
2159 case SLJIT_FABS:
2160 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2161 break;
2162 }
2163
2164 if (dst_r == TMP_FREG)
2165 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2166 return SLJIT_SUCCESS;
2167 }
2168
2169 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2170 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2171 #else
2172 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2173 #endif
2174 int dst, sljit_w dstw,
2175 int src1, sljit_w src1w,
2176 int src2, sljit_w src2w)
2177 {
2178 int dst_r;
2179
2180 CHECK_ERROR();
2181 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2182
2183 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2184 compiler->mode32 = 1;
2185 #endif
2186
2187 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2188 dst_r = dst;
2189 if (dst == src1)
2190 ; /* Do nothing here. */
2191 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2192 /* Swap arguments. */
2193 src2 = src1;
2194 src2w = src1w;
2195 }
2196 else if (dst != src2)
2197 FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2198 else {
2199 dst_r = TMP_FREG;
2200 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2201 }
2202 }
2203 else {
2204 dst_r = TMP_FREG;
2205 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2206 }
2207
2208 switch (op) {
2209 case SLJIT_FADD:
2210 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2211 break;
2212
2213 case SLJIT_FSUB:
2214 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2215 break;
2216
2217 case SLJIT_FMUL:
2218 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2219 break;
2220
2221 case SLJIT_FDIV:
2222 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2223 break;
2224 }
2225
2226 if (dst_r == TMP_FREG)
2227 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2228 return SLJIT_SUCCESS;
2229 }
2230
2231 #endif
2232
2233 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2234
2235 static int emit_fld(struct sljit_compiler *compiler,
2236 int src, sljit_w srcw)
2237 {
2238 sljit_ub *buf;
2239
2240 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2241 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2242 FAIL_IF(!buf);
2243 INC_SIZE(2);
2244 *buf++ = 0xd9;
2245 *buf = 0xc0 + src - 1;
2246 return SLJIT_SUCCESS;
2247 }
2248
2249 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2250 FAIL_IF(!buf);
2251 *buf = 0xdd;
2252 return SLJIT_SUCCESS;
2253 }
2254
2255 static int emit_fop(struct sljit_compiler *compiler,
2256 sljit_ub st_arg, sljit_ub st_arg2,
2257 sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2258 int src, sljit_w srcw)
2259 {
2260 sljit_ub *buf;
2261
2262 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2263 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2264 FAIL_IF(!buf);
2265 INC_SIZE(2);
2266 *buf++ = st_arg;
2267 *buf = st_arg2 + src;
2268 return SLJIT_SUCCESS;
2269 }
2270
2271 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2272 FAIL_IF(!buf);
2273 *buf++ = m64fp_arg;
2274 *buf |= m64fp_arg2;
2275 return SLJIT_SUCCESS;
2276 }
2277
2278 static int emit_fop_regs(struct sljit_compiler *compiler,
2279 sljit_ub st_arg, sljit_ub st_arg2,
2280 int src)
2281 {
2282 sljit_ub *buf;
2283
2284 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2285 FAIL_IF(!buf);
2286 INC_SIZE(2);
2287 *buf++ = st_arg;
2288 *buf = st_arg2 + src;
2289 return SLJIT_SUCCESS;
2290 }
2291
2292 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2293 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2294 #else
2295 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2296 #endif
2297 int dst, sljit_w dstw,
2298 int src, sljit_w srcw)
2299 {
2300 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2301 sljit_ub *buf;
2302 #endif
2303
2304 CHECK_ERROR();
2305 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2306
2307 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2308 compiler->mode32 = 1;
2309 #endif
2310
2311 if (GET_OPCODE(op) == SLJIT_FCMP) {
2312 compiler->flags_saved = 0;
2313 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2314 FAIL_IF(emit_fld(compiler, dst, dstw));
2315 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2316
2317 /* Copy flags. */
2318 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2319 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2320 FAIL_IF(!buf);
2321 INC_SIZE(3);
2322 *buf++ = 0xdf;
2323 *buf++ = 0xe0;
2324 /* Note: lahf is not supported on all x86-64 architectures. */
2325 *buf++ = 0x9e;
2326 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2327 #else
2328 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2329 FAIL_IF(emit_fld(compiler, dst, dstw));
2330 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2331 } else {
2332 FAIL_IF(emit_fld(compiler, src, srcw));
2333 FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2334 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2335 FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2336 }
2337 #endif
2338 return SLJIT_SUCCESS;
2339 }
2340
2341 FAIL_IF(emit_fld(compiler, src, srcw));
2342
2343 switch (op) {
2344 case SLJIT_FNEG:
2345 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2346 break;
2347 case SLJIT_FABS:
2348 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2349 break;
2350 }
2351
2352 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2353
2354 return SLJIT_SUCCESS;
2355 }
2356
2357 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2358 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2359 #else
2360 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2361 #endif
2362 int dst, sljit_w dstw,
2363 int src1, sljit_w src1w,
2364 int src2, sljit_w src2w)
2365 {
2366 CHECK_ERROR();
2367 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2368
2369 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2370 compiler->mode32 = 1;
2371 #endif
2372
2373 if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2374 FAIL_IF(emit_fld(compiler, src2, src2w));
2375
2376 switch (op) {
2377 case SLJIT_FADD:
2378 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2379 break;
2380 case SLJIT_FSUB:
2381 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2382 break;
2383 case SLJIT_FMUL:
2384 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2385 break;
2386 case SLJIT_FDIV:
2387 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2388 break;
2389 }
2390 return SLJIT_SUCCESS;
2391 }
2392
2393 FAIL_IF(emit_fld(compiler, src1, src1w));
2394
2395 if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2396 switch (op) {
2397 case SLJIT_FADD:
2398 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2399 break;
2400 case SLJIT_FSUB:
2401 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2402 break;
2403 case SLJIT_FMUL:
2404 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2405 break;
2406 case SLJIT_FDIV:
2407 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2408 break;
2409 }
2410 return SLJIT_SUCCESS;
2411 }
2412
2413 switch (op) {
2414 case SLJIT_FADD:
2415 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2416 break;
2417 case SLJIT_FSUB:
2418 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2419 break;
2420 case SLJIT_FMUL:
2421 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2422 break;
2423 case SLJIT_FDIV:
2424 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2425 break;
2426 }
2427
2428 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2429
2430 return SLJIT_SUCCESS;
2431 }
2432 #endif
2433
2434 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2435
2436 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2437 int dst, sljit_w dstw,
2438 int src, sljit_w srcw)
2439 {
2440 if (sse2_available)
2441 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2442 else
2443 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2444 }
2445
2446 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2447 int dst, sljit_w dstw,
2448 int src1, sljit_w src1w,
2449 int src2, sljit_w src2w)
2450 {
2451 if (sse2_available)
2452 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2453 else
2454 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2455 }
2456
2457 #endif
2458
2459 /* --------------------------------------------------------------------- */
2460 /* Conditional instructions */
2461 /* --------------------------------------------------------------------- */
2462
2463 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2464 {
2465 sljit_ub *buf;
2466 struct sljit_label *label;
2467
2468 CHECK_ERROR_PTR();
2469 check_sljit_emit_label(compiler);
2470
2471 /* We should restore the flags before the label,
2472 since other taken jumps has their own flags as well. */
2473 if (SLJIT_UNLIKELY(compiler->flags_saved))
2474 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2475
2476 if (compiler->last_label && compiler->last_label->size == compiler->size)
2477 return compiler->last_label;
2478
2479 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2480 PTR_FAIL_IF(!label);
2481 set_label(label, compiler);
2482
2483 buf = (sljit_ub*)ensure_buf(compiler, 2);
2484 PTR_FAIL_IF(!buf);
2485
2486 *buf++ = 0;
2487 *buf++ = 0;
2488
2489 return label;
2490 }
2491
2492 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2493 {
2494 sljit_ub *buf;
2495 struct sljit_jump *jump;
2496
2497 CHECK_ERROR_PTR();
2498 check_sljit_emit_jump(compiler, type);
2499
2500 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2501 if ((type & 0xff) <= SLJIT_JUMP)
2502 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2503 compiler->flags_saved = 0;
2504 }
2505
2506 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2507 PTR_FAIL_IF_NULL(jump);
2508 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2509 type &= 0xff;
2510
2511 if (type >= SLJIT_CALL1)
2512 PTR_FAIL_IF(call_with_args(compiler, type));
2513
2514 /* Worst case size. */
2515 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2516 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2517 #else
2518 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2519 #endif
2520
2521 buf = (sljit_ub*)ensure_buf(compiler, 2);
2522 PTR_FAIL_IF_NULL(buf);
2523
2524 *buf++ = 0;
2525 *buf++ = type + 4;
2526 return jump;
2527 }
2528
2529 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2530 {
2531 sljit_ub *code;
2532 struct sljit_jump *jump;
2533
2534 CHECK_ERROR();
2535 check_sljit_emit_ijump(compiler, type, src, srcw);
2536
2537 CHECK_EXTRA_REGS(src, srcw, (void)0);
2538 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2539 if (type <= SLJIT_JUMP)
2540 FAIL_IF(emit_restore_flags(compiler, 0));
2541 compiler->flags_saved = 0;
2542 }
2543
2544 if (type >= SLJIT_CALL1) {
2545 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2546 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2547 if (src == SLJIT_TEMPORARY_REG3) {
2548 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2549 src = TMP_REGISTER;
2550 }
2551 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2552 if (src & 0xf0) {
2553 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2554 src = TMP_REGISTER;
2555 }
2556 else
2557 srcw += sizeof(sljit_w);
2558 }
2559 #else
2560 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2561 if (src & 0xf0) {
2562 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2563 src = TMP_REGISTER;
2564 }
2565 else
2566 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2567 }
2568 #endif
2569 #endif
2570 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2571 if (src == SLJIT_TEMPORARY_REG3) {
2572 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2573 src = TMP_REGISTER;
2574 }
2575 #endif
2576 FAIL_IF(call_with_args(compiler, type));
2577 }
2578
2579 if (src == SLJIT_IMM) {
2580 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2581 FAIL_IF_NULL(jump);
2582 set_jump(jump, compiler, JUMP_ADDR);
2583 jump->u.target = srcw;
2584
2585 /* Worst case size. */
2586 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2587 compiler->size += 5;
2588 #else
2589 compiler->size += 10 + 3;
2590 #endif
2591
2592 code = (sljit_ub*)ensure_buf(compiler, 2);
2593 FAIL_IF_NULL(code);
2594
2595 *code++ = 0;
2596 *code++ = type + 4;
2597 }
2598 else {
2599 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2600 /* REX_W is not necessary (src is not immediate). */
2601 compiler->mode32 = 1;
2602 #endif
2603 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2604 FAIL_IF(!code);
2605 *code++ = 0xff;
2606 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2607 }
2608 return SLJIT_SUCCESS;
2609 }
2610
2611 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2612 {
2613 sljit_ub *buf;
2614 sljit_ub cond_set = 0;
2615 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2616 int reg;
2617 #endif
2618
2619 CHECK_ERROR();
2620 check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2621
2622 if (dst == SLJIT_UNUSED)
2623 return SLJIT_SUCCESS;
2624
2625 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2626 if (SLJIT_UNLIKELY(compiler->flags_saved))
2627 FAIL_IF(emit_restore_flags(compiler, 0));
2628
2629 switch (type) {
2630 case SLJIT_C_EQUAL:
2631 case SLJIT_C_FLOAT_EQUAL:
2632 cond_set = 0x94;
2633 break;
2634
2635 case SLJIT_C_NOT_EQUAL:
2636 case SLJIT_C_FLOAT_NOT_EQUAL:
2637 cond_set = 0x95;
2638 break;
2639
2640 case SLJIT_C_LESS:
2641 case SLJIT_C_FLOAT_LESS:
2642 cond_set = 0x92;
2643 break;
2644
2645 case SLJIT_C_GREATER_EQUAL:
2646 case SLJIT_C_FLOAT_GREATER_EQUAL:
2647 cond_set = 0x93;
2648 break;
2649
2650 case SLJIT_C_GREATER:
2651 case SLJIT_C_FLOAT_GREATER:
2652 cond_set = 0x97;
2653 break;
2654
2655 case SLJIT_C_LESS_EQUAL:
2656 case SLJIT_C_FLOAT_LESS_EQUAL:
2657 cond_set = 0x96;
2658 break;
2659
2660 case SLJIT_C_SIG_LESS:
2661 cond_set = 0x9c;
2662 break;
2663
2664 case SLJIT_C_SIG_GREATER_EQUAL:
2665 cond_set = 0x9d;
2666 break;
2667
2668 case SLJIT_C_SIG_GREATER:
2669 cond_set = 0x9f;
2670 break;
2671
2672 case SLJIT_C_SIG_LESS_EQUAL:
2673 cond_set = 0x9e;
2674 break;
2675
2676 case SLJIT_C_OVERFLOW:
2677 case SLJIT_C_MUL_OVERFLOW:
2678 cond_set = 0x90;
2679 break;
2680
2681 case SLJIT_C_NOT_OVERFLOW:
2682 case SLJIT_C_MUL_NOT_OVERFLOW:
2683 cond_set = 0x91;
2684 break;
2685
2686 case SLJIT_C_FLOAT_NAN:
2687 cond_set = 0x9a;
2688 break;
2689
2690 case SLJIT_C_FLOAT_NOT_NAN:
2691 cond_set = 0x9b;
2692 break;
2693 }
2694
2695 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2696 reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2697
2698 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2699 FAIL_IF(!buf);
2700 INC_SIZE(4 + 4);
2701 /* Set low register to conditional flag. */
2702 *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2703 *buf++ = 0x0f;
2704 *buf++ = cond_set;
2705 *buf++ = 0xC0 | reg_lmap[reg];
2706 *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2707 *buf++ = 0x0f;
2708 *buf++ = 0xb6;
2709 *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2710
2711 if (reg == TMP_REGISTER) {
2712 if (op == SLJIT_MOV) {
2713 compiler->mode32 = 0;
2714 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2715 }
2716 else {
2717 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2718 compiler->skip_checks = 1;
2719 #endif
2720 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2721 }
2722 }
2723 #else
2724 if (op == SLJIT_MOV) {
2725 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2726 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2727 FAIL_IF(!buf);
2728 INC_SIZE(3 + 3);
2729 /* Set low byte to conditional flag. */
2730 *buf++ = 0x0f;
2731 *buf++ = cond_set;
2732 *buf++ = 0xC0 | reg_map[dst];
2733
2734 *buf++ = 0x0f;
2735 *buf++ = 0xb6;
2736 *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2737 }
2738 else {
2739 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2740
2741 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2742 FAIL_IF(!buf);
2743 INC_SIZE(3 + 3);
2744 /* Set al to conditional flag. */
2745 *buf++ = 0x0f;
2746 *buf++ = cond_set;
2747 *buf++ = 0xC0;
2748
2749 *buf++ = 0x0f;
2750 *buf++ = 0xb6;
2751 if (dst >= SLJIT_SAVED_REG1 && dst <= SLJIT_NO_REGISTERS)
2752 *buf = 0xC0 | (reg_map[dst] << 3);
2753 else {
2754 *buf = 0xC0;
2755 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2756 }
2757
2758 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2759 }
2760 }
2761 else {
2762 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2763 EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2764 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2765 FAIL_IF(!buf);
2766 INC_SIZE(3);
2767
2768 *buf++ = 0x0f;
2769 *buf++ = cond_set;
2770 *buf++ = 0xC0 | reg_map[dst];
2771 }
2772 else {
2773 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2774
2775 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2776 FAIL_IF(!buf);
2777 INC_SIZE(3 + 3 + 1);
2778 /* Set al to conditional flag. */
2779 *buf++ = 0x0f;
2780 *buf++ = cond_set;
2781 *buf++ = 0xC0;
2782
2783 *buf++ = 0x0f;
2784 *buf++ = 0xb6;
2785 *buf++ = 0xC0;
2786
2787 *buf++ = 0x90 + reg_map[TMP_REGISTER];
2788 }
2789 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2790 compiler->skip_checks = 1;
2791 #endif
2792 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2793 }
2794 #endif
2795
2796 return SLJIT_SUCCESS;
2797 }
2798
2799 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2800 {
2801 sljit_ub *buf;
2802 struct sljit_const *const_;
2803 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2804 int reg;
2805 #endif
2806
2807 CHECK_ERROR_PTR();
2808 check_sljit_emit_const(compiler, dst, dstw, init_value);
2809
2810 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2811
2812 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2813 PTR_FAIL_IF(!const_);
2814 set_const(const_, compiler);
2815
2816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2817 compiler->mode32 = 0;
2818 reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2819
2820 if (emit_load_imm64(compiler, reg, init_value))
2821 return NULL;
2822 #else
2823 if (dst == SLJIT_UNUSED)
2824 dst = TMP_REGISTER;
2825
2826 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2827 return NULL;
2828 #endif
2829
2830 buf = (sljit_ub*)ensure_buf(compiler, 2);
2831 PTR_FAIL_IF(!buf);
2832
2833 *buf++ = 0;
2834 *buf++ = 1;
2835
2836 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2837 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2838 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2839 return NULL;
2840 #endif
2841
2842 return const_;
2843 }
2844
2845 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2846 {
2847 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2848 *(sljit_w*)addr = new_addr - (addr + 4);
2849 #else
2850 *(sljit_uw*)addr = new_addr;
2851 #endif
2852 }
2853
2854 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2855 {
2856 *(sljit_w*)addr = new_constant;
2857 }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12