/[pcre]/code/trunk/sljit/sljitNativeARM_Thumb2.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeARM_Thumb2.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 836 - (hide annotations) (download)
Wed Dec 28 17:16:11 2011 UTC (2 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 55926 byte(s)
Merging all the changes from the pcre16 branch into the trunk.

1 ph10 662 /*
2     * Stack-less Just-In-Time compiler
3     *
4 ph10 836 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 ph10 662 *
6     * Redistribution and use in source and binary forms, with or without modification, are
7     * permitted provided that the following conditions are met:
8     *
9     * 1. Redistributions of source code must retain the above copyright notice, this list of
10     * conditions and the following disclaimer.
11     *
12     * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13     * of conditions and the following disclaimer in the documentation and/or other materials
14     * provided with the distribution.
15     *
16     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17     * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19     * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21     * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22     * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23     * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24     * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25     */
26    
27 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 ph10 662 {
29     return "arm-thumb2";
30     }
31    
32     /* Last register + 1. */
33     #define TMP_REG1 (SLJIT_NO_REGISTERS + 1)
34     #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
35     #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
36     #define TMP_PC (SLJIT_NO_REGISTERS + 4)
37    
38     #define TMP_FREG1 (SLJIT_FLOAT_REG4 + 1)
39     #define TMP_FREG2 (SLJIT_FLOAT_REG4 + 2)
40    
41     /* See sljit_emit_enter if you want to change them. */
42     static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 5] = {
43     0, 0, 1, 2, 12, 5, 6, 7, 8, 10, 11, 13, 3, 4, 14, 15
44     };
45    
46     #define COPY_BITS(src, from, to, bits) \
47     ((from >= to ? (src >> (from - to)) : (src << (to - from))) & (((1 << bits) - 1) << to))
48    
49     /* Thumb16 encodings. */
50     #define RD3(rd) (reg_map[rd])
51     #define RN3(rn) (reg_map[rn] << 3)
52     #define RM3(rm) (reg_map[rm] << 6)
53     #define RDN3(rdn) (reg_map[rdn] << 8)
54     #define IMM3(imm) (imm << 6)
55     #define IMM8(imm) (imm)
56    
57     /* Thumb16 helpers. */
58     #define SET_REGS44(rd, rn) \
59     ((reg_map[rn] << 3) | (reg_map[rd] & 0x7) | ((reg_map[rd] & 0x8) << 4))
60     #define IS_2_LO_REGS(reg1, reg2) \
61     (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
62     #define IS_3_LO_REGS(reg1, reg2, reg3) \
63     (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
64    
65     /* Thumb32 encodings. */
66     #define RD4(rd) (reg_map[rd] << 8)
67     #define RN4(rn) (reg_map[rn] << 16)
68     #define RM4(rm) (reg_map[rm])
69     #define RT4(rt) (reg_map[rt] << 12)
70     #define DD4(dd) ((dd) << 12)
71     #define DN4(dn) ((dn) << 16)
72     #define DM4(dm) (dm)
73     #define IMM5(imm) \
74     (COPY_BITS(imm, 2, 12, 3) | ((imm & 0x3) << 6))
75     #define IMM12(imm) \
76     (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff))
77    
78     typedef sljit_ui sljit_ins;
79    
80     /* --------------------------------------------------------------------- */
81     /* Instrucion forms */
82     /* --------------------------------------------------------------------- */
83    
84     /* dot '.' changed to _
85     I immediate form (possibly followed by number of immediate bits). */
86     #define ADCI 0xf1400000
87     #define ADCS 0x4140
88     #define ADC_W 0xeb400000
89     #define ADD 0x4400
90     #define ADDS 0x1800
91     #define ADDSI3 0x1c00
92     #define ADDSI8 0x3000
93     #define ADD_W 0xeb000000
94     #define ADDWI 0xf2000000
95     #define ADD_SP 0xb000
96     #define ADD_W 0xeb000000
97     #define ADD_WI 0xf1000000
98     #define ANDI 0xf0000000
99     #define ANDS 0x4000
100     #define AND_W 0xea000000
101     #define ASRS 0x4100
102     #define ASRSI 0x1000
103     #define ASR_W 0xfa40f000
104     #define ASR_WI 0xea4f0020
105     #define BICI 0xf0200000
106     #define BKPT 0xbe00
107     #define BLX 0x4780
108     #define BX 0x4700
109     #define CLZ 0xfab0f080
110     #define CMPI 0x2800
111     #define CMP_W 0xebb00f00
112     #define EORI 0xf0800000
113     #define EORS 0x4040
114     #define EOR_W 0xea800000
115     #define IT 0xbf00
116     #define LSLS 0x4080
117     #define LSLSI 0x0000
118     #define LSL_W 0xfa00f000
119     #define LSL_WI 0xea4f0000
120     #define LSRS 0x40c0
121     #define LSRSI 0x0800
122     #define LSR_W 0xfa20f000
123     #define LSR_WI 0xea4f0010
124     #define MOV 0x4600
125     #define MOVSI 0x2000
126     #define MOVT 0xf2c00000
127     #define MOVW 0xf2400000
128     #define MOV_WI 0xf04f0000
129     #define MUL 0xfb00f000
130     #define MVNS 0x43c0
131     #define MVN_W 0xea6f0000
132     #define MVN_WI 0xf06f0000
133     #define NOP 0xbf00
134     #define ORNI 0xf0600000
135     #define ORRI 0xf0400000
136     #define ORRS 0x4300
137     #define ORR_W 0xea400000
138     #define POP 0xbd00
139     #define POP_W 0xe8bd0000
140     #define PUSH 0xb500
141     #define PUSH_W 0xe92d0000
142     #define RSB_WI 0xf1c00000
143     #define RSBSI 0x4240
144     #define SBCI 0xf1600000
145     #define SBCS 0x4180
146     #define SBC_W 0xeb600000
147     #define SMULL 0xfb800000
148     #define STR_SP 0x9000
149     #define SUBS 0x1a00
150     #define SUBSI3 0x1e00
151     #define SUBSI8 0x3800
152     #define SUB_W 0xeba00000
153     #define SUBWI 0xf2a00000
154     #define SUB_SP 0xb080
155     #define SUB_WI 0xf1a00000
156     #define SXTB 0xb240
157     #define SXTB_W 0xfa4ff080
158     #define SXTH 0xb200
159     #define SXTH_W 0xfa0ff080
160     #define TST 0x4200
161     #define UXTB 0xb2c0
162     #define UXTB_W 0xfa5ff080
163     #define UXTH 0xb280
164     #define UXTH_W 0xfa1ff080
165     #define VABS_F64 0xeeb00bc0
166     #define VADD_F64 0xee300b00
167     #define VCMP_F64 0xeeb40b40
168     #define VDIV_F64 0xee800b00
169     #define VMOV_F64 0xeeb00b40
170     #define VMRS 0xeef1fa10
171     #define VMUL_F64 0xee200b00
172     #define VNEG_F64 0xeeb10b40
173     #define VSTR 0xed000b00
174     #define VSUB_F64 0xee300b40
175    
176     static int push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
177     {
178     sljit_uh *ptr;
179     SLJIT_ASSERT(!(inst & 0xffff0000));
180    
181     ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_uh));
182     FAIL_IF(!ptr);
183     *ptr = inst;
184     compiler->size++;
185     return SLJIT_SUCCESS;
186     }
187    
188     static int push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
189     {
190     sljit_uh *ptr = (sljit_uh*)ensure_buf(compiler, sizeof(sljit_ins));
191     FAIL_IF(!ptr);
192     *ptr++ = inst >> 16;
193     *ptr = inst;
194     compiler->size += 2;
195     return SLJIT_SUCCESS;
196     }
197    
198     static SLJIT_INLINE int emit_imm32_const(struct sljit_compiler *compiler, int dst, sljit_uw imm)
199     {
200     FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
201     COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
202     return push_inst32(compiler, MOVT | RD4(dst) |
203     COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
204     }
205    
206     static SLJIT_INLINE void modify_imm32_const(sljit_uh* inst, sljit_uw new_imm)
207     {
208     int dst = inst[1] & 0x0f00;
209     SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
210     inst[0] = (MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1);
211     inst[1] = dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff);
212     inst[2] = (MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1);
213     inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16);
214     }
215    
216     static SLJIT_INLINE int detect_jump_type(struct sljit_jump *jump, sljit_uh *code_ptr, sljit_uh *code)
217     {
218     sljit_w diff;
219    
220     if (jump->flags & SLJIT_REWRITABLE_JUMP)
221     return 0;
222    
223     if (jump->flags & JUMP_ADDR) {
224     /* Branch to ARM code is not optimized yet. */
225     if (!(jump->u.target & 0x1))
226     return 0;
227     diff = ((sljit_w)jump->u.target - (sljit_w)(code_ptr + 2)) >> 1;
228     }
229     else {
230     SLJIT_ASSERT(jump->flags & JUMP_LABEL);
231     diff = ((sljit_w)(code + jump->u.label->size) - (sljit_w)(code_ptr + 2)) >> 1;
232     }
233    
234     if (jump->flags & IS_CONDITIONAL) {
235     SLJIT_ASSERT(!(jump->flags & IS_BL));
236     if (diff <= 127 && diff >= -128) {
237     jump->flags |= B_TYPE1;
238     return 5;
239     }
240     if (diff <= 524287 && diff >= -524288) {
241     jump->flags |= B_TYPE2;
242     return 4;
243     }
244     /* +1 comes from the prefix IT instruction. */
245     diff--;
246     if (diff <= 8388607 && diff >= -8388608) {
247     jump->flags |= B_TYPE3;
248     return 3;
249     }
250     }
251     else if (jump->flags & IS_BL) {
252     if (diff <= 8388607 && diff >= -8388608) {
253     jump->flags |= BL_TYPE6;
254     return 3;
255     }
256     }
257     else {
258     if (diff <= 1023 && diff >= -1024) {
259     jump->flags |= B_TYPE4;
260     return 4;
261     }
262     if (diff <= 8388607 && diff >= -8388608) {
263     jump->flags |= B_TYPE5;
264     return 3;
265     }
266     }
267    
268     return 0;
269     }
270    
271     static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, int flush)
272     {
273     sljit_uh* inst = (sljit_uh*)addr;
274     modify_imm32_const(inst, new_addr);
275     if (flush) {
276     SLJIT_CACHE_FLUSH(inst, inst + 3);
277     }
278     }
279    
280     static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump)
281     {
282     int type = (jump->flags >> 4) & 0xf;
283     sljit_w diff;
284     sljit_uh *jump_inst;
285     int s, j1, j2;
286    
287     if (SLJIT_UNLIKELY(type == 0)) {
288     inline_set_jump_addr(jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
289     return;
290     }
291    
292     if (jump->flags & JUMP_ADDR) {
293     SLJIT_ASSERT(jump->u.target & 0x1);
294     diff = ((sljit_w)jump->u.target - (sljit_w)(jump->addr + 4)) >> 1;
295     }
296     else
297     diff = ((sljit_w)(jump->u.label->addr) - (sljit_w)(jump->addr + 4)) >> 1;
298     jump_inst = (sljit_uh*)jump->addr;
299    
300     switch (type) {
301     case 1:
302     /* Encoding T1 of 'B' instruction */
303     SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_CONDITIONAL));
304     jump_inst[0] = 0xd000 | (jump->flags & 0xf00) | (diff & 0xff);
305     return;
306     case 2:
307     /* Encoding T3 of 'B' instruction */
308     SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_CONDITIONAL));
309     jump_inst[0] = 0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1);
310     jump_inst[1] = 0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | (diff & 0x7ff);
311     return;
312     case 3:
313     SLJIT_ASSERT(jump->flags & IS_CONDITIONAL);
314     *jump_inst++ = IT | ((jump->flags >> 4) & 0xf0) | 0x8;
315     diff--;
316     type = 5;
317     break;
318     case 4:
319     /* Encoding T2 of 'B' instruction */
320     SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_CONDITIONAL));
321     jump_inst[0] = 0xe000 | (diff & 0x7ff);
322     return;
323     }
324    
325     SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
326    
327     /* Really complex instruction form for branches. */
328     s = (diff >> 23) & 0x1;
329     j1 = (~(diff >> 21) ^ s) & 0x1;
330     j2 = (~(diff >> 22) ^ s) & 0x1;
331     jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
332     jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
333    
334     /* The others have a common form. */
335     if (type == 5) /* Encoding T4 of 'B' instruction */
336     jump_inst[1] |= 0x9000;
337     else if (type == 6) /* Encoding T1 of 'BL' instruction */
338     jump_inst[1] |= 0xd000;
339     else
340     SLJIT_ASSERT_STOP();
341     }
342    
343 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
344 ph10 662 {
345     struct sljit_memory_fragment *buf;
346     sljit_uh *code;
347     sljit_uh *code_ptr;
348     sljit_uh *buf_ptr;
349     sljit_uh *buf_end;
350     sljit_uw half_count;
351    
352     struct sljit_label *label;
353     struct sljit_jump *jump;
354     struct sljit_const *const_;
355    
356     CHECK_ERROR_PTR();
357     check_sljit_generate_code(compiler);
358     reverse_buf(compiler);
359    
360     code = (sljit_uh*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_uh));
361     PTR_FAIL_WITH_EXEC_IF(code);
362     buf = compiler->buf;
363    
364     code_ptr = code;
365     half_count = 0;
366     label = compiler->labels;
367     jump = compiler->jumps;
368     const_ = compiler->consts;
369    
370     do {
371     buf_ptr = (sljit_uh*)buf->memory;
372     buf_end = buf_ptr + (buf->used_size >> 1);
373     do {
374     *code_ptr = *buf_ptr++;
375     /* These structures are ordered by their address. */
376     SLJIT_ASSERT(!label || label->size >= half_count);
377     SLJIT_ASSERT(!jump || jump->addr >= half_count);
378     SLJIT_ASSERT(!const_ || const_->addr >= half_count);
379     if (label && label->size == half_count) {
380     label->addr = ((sljit_uw)code_ptr) | 0x1;
381     label->size = code_ptr - code;
382     label = label->next;
383     }
384     if (jump && jump->addr == half_count) {
385     jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_CONDITIONAL) ? 10 : 8);
386     code_ptr -= detect_jump_type(jump, code_ptr, code);
387     jump = jump->next;
388     }
389     if (const_ && const_->addr == half_count) {
390     const_->addr = (sljit_uw)code_ptr;
391     const_ = const_->next;
392     }
393     code_ptr ++;
394     half_count ++;
395     } while (buf_ptr < buf_end);
396    
397     buf = buf->next;
398     } while (buf);
399    
400     if (label && label->size == half_count) {
401     label->addr = ((sljit_uw)code_ptr) | 0x1;
402     label->size = code_ptr - code;
403     label = label->next;
404     }
405    
406     SLJIT_ASSERT(!label);
407     SLJIT_ASSERT(!jump);
408     SLJIT_ASSERT(!const_);
409     SLJIT_ASSERT(code_ptr - code <= (int)compiler->size);
410    
411     jump = compiler->jumps;
412     while (jump) {
413     set_jump_instruction(jump);
414     jump = jump->next;
415     }
416    
417     SLJIT_CACHE_FLUSH(code, code_ptr);
418     compiler->error = SLJIT_ERR_COMPILED;
419 ph10 836 compiler->executable_size = compiler->size * sizeof(sljit_uh);
420 ph10 662 /* Set thumb mode flag. */
421     return (void*)((sljit_uw)code | 0x1);
422     }
423    
424     #define INVALID_IMM 0x80000000
425     static sljit_uw get_imm(sljit_uw imm)
426     {
427     /* Thumb immediate form. */
428     int counter;
429    
430     if (imm <= 0xff)
431     return imm;
432    
433     if ((imm & 0xffff) == (imm >> 16)) {
434     /* Some special cases. */
435     if (!(imm & 0xff00))
436     return (1 << 12) | (imm & 0xff);
437     if (!(imm & 0xff))
438     return (2 << 12) | ((imm >> 8) & 0xff);
439     if ((imm & 0xff00) == ((imm & 0xff) << 8))
440     return (3 << 12) | (imm & 0xff);
441     }
442    
443     /* Assembly optimization: count leading zeroes? */
444     counter = 8;
445     if (!(imm & 0xffff0000)) {
446     counter += 16;
447     imm <<= 16;
448     }
449     if (!(imm & 0xff000000)) {
450     counter += 8;
451     imm <<= 8;
452     }
453     if (!(imm & 0xf0000000)) {
454     counter += 4;
455     imm <<= 4;
456     }
457     if (!(imm & 0xc0000000)) {
458     counter += 2;
459     imm <<= 2;
460     }
461     if (!(imm & 0x80000000)) {
462     counter += 1;
463     imm <<= 1;
464     }
465     /* Since imm >= 128, this must be true. */
466     SLJIT_ASSERT(counter <= 31);
467    
468     if (imm & 0x00ffffff)
469     return INVALID_IMM; /* Cannot be encoded. */
470    
471     return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
472     }
473    
474     static int load_immediate(struct sljit_compiler *compiler, int dst, sljit_uw imm)
475     {
476     sljit_uw tmp;
477    
478     if (imm >= 0x10000) {
479     tmp = get_imm(imm);
480     if (tmp != INVALID_IMM)
481     return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
482     tmp = get_imm(~imm);
483     if (tmp != INVALID_IMM)
484     return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
485     }
486    
487     /* set low 16 bits, set hi 16 bits to 0. */
488     FAIL_IF(push_inst32(compiler, MOVW | RD4(dst) |
489     COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
490    
491     /* set hi 16 bit if needed. */
492     if (imm >= 0x10000)
493     return push_inst32(compiler, MOVT | RD4(dst) |
494     COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
495     return SLJIT_SUCCESS;
496     }
497    
498     #define ARG1_IMM 0x0010000
499     #define ARG2_IMM 0x0020000
500     #define KEEP_FLAGS 0x0040000
501     #define SET_MULOV 0x0080000
502     /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
503     #define SET_FLAGS 0x0100000
504     #define UNUSED_RETURN 0x0200000
505     #define SLOW_DEST 0x0400000
506     #define SLOW_SRC1 0x0800000
507     #define SLOW_SRC2 0x1000000
508    
509     static int emit_op_imm(struct sljit_compiler *compiler, int flags, int dst, sljit_uw arg1, sljit_uw arg2)
510     {
511     /* dst must be register, TMP_REG1
512     arg1 must be register, TMP_REG1, imm
513     arg2 must be register, TMP_REG2, imm */
514     int reg;
515     sljit_uw imm;
516    
517     if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
518     /* Both are immediates. */
519     flags &= ~ARG1_IMM;
520     FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
521     arg1 = TMP_REG1;
522     }
523    
524     if (flags & (ARG1_IMM | ARG2_IMM)) {
525     reg = (flags & ARG2_IMM) ? arg1 : arg2;
526     imm = (flags & ARG2_IMM) ? arg2 : arg1;
527    
528     switch (flags & 0xffff) {
529     case SLJIT_MOV:
530     SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
531     return load_immediate(compiler, dst, imm);
532     case SLJIT_NOT:
533     if (!(flags & SET_FLAGS))
534     return load_immediate(compiler, dst, ~imm);
535     /* Since the flags should be set, we just fallback to the register mode.
536     Although I could do some clever things here, "NOT IMM" does not worth the efforts. */
537     break;
538     case SLJIT_CLZ:
539     /* No form with immediate operand. */
540     break;
541     case SLJIT_ADD:
542     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
543     if (imm <= 0x7)
544     return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
545     if (reg == dst && imm <= 0xff)
546     return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
547     }
548     if (imm <= 0xfff && !(flags & SET_FLAGS))
549     return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
550     imm = get_imm(imm);
551     if (imm != INVALID_IMM)
552     return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
553     break;
554     case SLJIT_ADDC:
555     imm = get_imm(imm);
556     if (imm != INVALID_IMM)
557     return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
558     break;
559     case SLJIT_SUB:
560     if (flags & ARG2_IMM) {
561     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) {
562     if (imm <= 0x7)
563     return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
564     if (imm <= 0xff) {
565     if (reg == dst)
566     return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
567     if (flags & UNUSED_RETURN)
568     return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
569     }
570     }
571     if (imm <= 0xfff && !(flags & SET_FLAGS))
572     return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
573     imm = get_imm(imm);
574     if (imm != INVALID_IMM)
575     return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
576     }
577     else {
578     if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst))
579     return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
580     imm = get_imm(imm);
581     if (imm != INVALID_IMM)
582     return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
583     }
584     break;
585     case SLJIT_SUBC:
586     if (flags & ARG2_IMM) {
587     imm = get_imm(imm);
588     if (imm != INVALID_IMM)
589     return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
590     }
591     break;
592     case SLJIT_MUL:
593     /* No form with immediate operand. */
594     break;
595     case SLJIT_AND:
596     imm = get_imm(imm);
597     if (imm != INVALID_IMM)
598     return push_inst32(compiler, ANDI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
599     imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1));
600     if (imm != INVALID_IMM)
601     return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
602     break;
603     case SLJIT_OR:
604     imm = get_imm(imm);
605     if (imm != INVALID_IMM)
606     return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
607     imm = get_imm(~((flags & ARG2_IMM) ? arg2 : arg1));
608     if (imm != INVALID_IMM)
609     return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
610     break;
611     case SLJIT_XOR:
612     imm = get_imm(imm);
613     if (imm != INVALID_IMM)
614     return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
615     break;
616     case SLJIT_SHL:
617     if (flags & ARG2_IMM) {
618     imm &= 0x1f;
619     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
620     return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
621     return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
622     }
623     break;
624     case SLJIT_LSHR:
625     if (flags & ARG2_IMM) {
626     imm &= 0x1f;
627     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
628     return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
629     return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
630     }
631     break;
632     case SLJIT_ASHR:
633     if (flags & ARG2_IMM) {
634     imm &= 0x1f;
635     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg))
636     return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
637     return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
638     }
639     break;
640     default:
641     SLJIT_ASSERT_STOP();
642     break;
643     }
644    
645     if (flags & ARG2_IMM) {
646     FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
647     arg2 = TMP_REG2;
648     }
649     else {
650     FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
651     arg1 = TMP_REG1;
652     }
653     }
654    
655     /* Both arguments are registers. */
656     switch (flags & 0xffff) {
657     case SLJIT_MOV:
658     case SLJIT_MOV_UI:
659     case SLJIT_MOV_SI:
660     case SLJIT_MOVU:
661     case SLJIT_MOVU_UI:
662     case SLJIT_MOVU_SI:
663     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
664     return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
665     case SLJIT_MOV_UB:
666     case SLJIT_MOVU_UB:
667     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
668     if (IS_2_LO_REGS(dst, arg2))
669     return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
670     return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
671     case SLJIT_MOV_SB:
672     case SLJIT_MOVU_SB:
673     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
674     if (IS_2_LO_REGS(dst, arg2))
675     return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
676     return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
677     case SLJIT_MOV_UH:
678     case SLJIT_MOVU_UH:
679     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
680     if (IS_2_LO_REGS(dst, arg2))
681     return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
682     return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
683     case SLJIT_MOV_SH:
684     case SLJIT_MOVU_SH:
685     SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
686     if (IS_2_LO_REGS(dst, arg2))
687     return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
688     return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
689     case SLJIT_NOT:
690     SLJIT_ASSERT(arg1 == TMP_REG1);
691     if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
692     return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2));
693     return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2));
694     case SLJIT_CLZ:
695     SLJIT_ASSERT(arg1 == TMP_REG1);
696     FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2)));
697     if (flags & SET_FLAGS) {
698     if (reg_map[dst] <= 7)
699     return push_inst16(compiler, CMPI | RDN3(dst));
700     return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst));
701     }
702     return SLJIT_SUCCESS;
703     case SLJIT_ADD:
704     if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
705     return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
706     if (dst == arg1 && !(flags & SET_FLAGS))
707     return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
708     return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
709     case SLJIT_ADDC:
710     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
711     return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
712     return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
713     case SLJIT_SUB:
714     if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2))
715     return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
716     return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
717     case SLJIT_SUBC:
718     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
719     return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
720     return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
721     case SLJIT_MUL:
722     if (!(flags & SET_FLAGS))
723     return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
724     SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2);
725     FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
726     /* cmp TMP_REG2, dst asr #31. */
727     return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
728     case SLJIT_AND:
729     if (!(flags & KEEP_FLAGS)) {
730     if (dst == arg1 && IS_2_LO_REGS(dst, arg2))
731     return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
732     if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
733     return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
734     }
735     return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
736     case SLJIT_OR:
737     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
738     return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
739     return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
740     case SLJIT_XOR:
741     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
742     return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
743     return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
744     case SLJIT_SHL:
745     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
746     return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
747     return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
748     case SLJIT_LSHR:
749     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
750     return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
751     return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
752     case SLJIT_ASHR:
753     if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2))
754     return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
755     return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
756     }
757    
758     SLJIT_ASSERT_STOP();
759     return SLJIT_SUCCESS;
760     }
761    
762     #define STORE 0x01
763     #define SIGNED 0x02
764    
765     #define WORD_SIZE 0x00
766     #define BYTE_SIZE 0x04
767     #define HALF_SIZE 0x08
768    
769     #define UPDATE 0x10
770     #define ARG_TEST 0x20
771    
772     #define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE)))
773     #define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift)))
774    
775     /*
776     1st letter:
777     w = word
778     b = byte
779     h = half
780    
781     2nd letter:
782     s = signed
783     u = unsigned
784    
785     3rd letter:
786     l = load
787     s = store
788     */
789    
790     static SLJIT_CONST sljit_uw sljit_mem16[12] = {
791     /* w u l */ 0x5800 /* ldr */,
792     /* w u s */ 0x5000 /* str */,
793     /* w s l */ 0x5800 /* ldr */,
794     /* w s s */ 0x5000 /* str */,
795    
796     /* b u l */ 0x5c00 /* ldrb */,
797     /* b u s */ 0x5400 /* strb */,
798     /* b s l */ 0x5600 /* ldrsb */,
799     /* b s s */ 0x5400 /* strb */,
800    
801     /* h u l */ 0x5a00 /* ldrh */,
802     /* h u s */ 0x5200 /* strh */,
803     /* h s l */ 0x5e00 /* ldrsh */,
804     /* h s s */ 0x5200 /* strh */,
805     };
806    
807     static SLJIT_CONST sljit_uw sljit_mem16_imm5[12] = {
808     /* w u l */ 0x6800 /* ldr imm5 */,
809     /* w u s */ 0x6000 /* str imm5 */,
810     /* w s l */ 0x6800 /* ldr imm5 */,
811     /* w s s */ 0x6000 /* str imm5 */,
812    
813     /* b u l */ 0x7800 /* ldrb imm5 */,
814     /* b u s */ 0x7000 /* strb imm5 */,
815     /* b s l */ 0x0000 /* not allowed */,
816     /* b s s */ 0x7000 /* strb imm5 */,
817    
818     /* h u l */ 0x8800 /* ldrh imm5 */,
819     /* h u s */ 0x8000 /* strh imm5 */,
820     /* h s l */ 0x0000 /* not allowed */,
821     /* h s s */ 0x8000 /* strh imm5 */,
822     };
823    
824     #define MEM_IMM8 0xc00
825     #define MEM_IMM12 0x800000
826     static SLJIT_CONST sljit_uw sljit_mem32[12] = {
827     /* w u l */ 0xf8500000 /* ldr.w */,
828     /* w u s */ 0xf8400000 /* str.w */,
829     /* w s l */ 0xf8500000 /* ldr.w */,
830     /* w s s */ 0xf8400000 /* str.w */,
831    
832     /* b u l */ 0xf8100000 /* ldrb.w */,
833     /* b u s */ 0xf8000000 /* strb.w */,
834     /* b s l */ 0xf9100000 /* ldrsb.w */,
835     /* b s s */ 0xf8000000 /* strb.w */,
836    
837     /* h u l */ 0xf8300000 /* ldrh.w */,
838     /* h u s */ 0xf8200000 /* strsh.w */,
839     /* h s l */ 0xf9300000 /* ldrsh.w */,
840     /* h s s */ 0xf8200000 /* strsh.w */,
841     };
842    
843     /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
844     static int emit_set_delta(struct sljit_compiler *compiler, int dst, int reg, sljit_w value)
845     {
846     if (value >= 0) {
847     if (value <= 0xfff)
848     return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
849     value = get_imm(value);
850     if (value != INVALID_IMM)
851     return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | value);
852     }
853     else {
854     value = -value;
855     if (value <= 0xfff)
856     return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
857     value = get_imm(value);
858     if (value != INVALID_IMM)
859     return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | value);
860     }
861     return SLJIT_ERR_UNSUPPORTED;
862     }
863    
864     /* Can perform an operation using at most 1 instruction. */
865     static int getput_arg_fast(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw)
866     {
867     int tmp;
868    
869     SLJIT_ASSERT(arg & SLJIT_MEM);
870    
871     if (SLJIT_UNLIKELY(flags & UPDATE)) {
872     if ((arg & 0xf) && !(arg & 0xf0) && argw <= 0xff && argw >= -0xff) {
873     flags &= ~UPDATE;
874     arg &= 0xf;
875     if (SLJIT_UNLIKELY(flags & ARG_TEST))
876     return 1;
877    
878     if (argw >= 0)
879     argw |= 0x200;
880     else {
881     argw = -argw;
882     }
883     SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff);
884     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw));
885     return -1;
886     }
887     return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
888     }
889    
890     if (SLJIT_UNLIKELY(arg & 0xf0)) {
891     argw &= 0x3;
892     tmp = (arg >> 4) & 0xf;
893     arg &= 0xf;
894     if (SLJIT_UNLIKELY(flags & ARG_TEST))
895     return 1;
896    
897     if (!argw && IS_3_LO_REGS(reg, arg, tmp))
898     FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp)));
899     else
900     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4)));
901     return -1;
902     }
903    
904     if (!(arg & 0xf) || argw > 0xfff || argw < -0xff)
905     return (flags & ARG_TEST) ? SLJIT_SUCCESS : 0;
906    
907     if (SLJIT_UNLIKELY(flags & ARG_TEST))
908     return 1;
909    
910     arg &= 0xf;
911     if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
912     tmp = 3;
913     if (IS_WORD_SIZE(flags)) {
914     if (OFFSET_CHECK(0x1f, 2))
915     tmp = 2;
916     }
917     else if (flags & BYTE_SIZE)
918     {
919     if (OFFSET_CHECK(0x1f, 0))
920     tmp = 0;
921     }
922     else {
923     SLJIT_ASSERT(flags & HALF_SIZE);
924     if (OFFSET_CHECK(0x1f, 1))
925     tmp = 1;
926     }
927    
928     if (tmp != 3) {
929     FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp))));
930     return -1;
931     }
932     }
933    
934     /* SP based immediate. */
935     if (SLJIT_UNLIKELY(arg == SLJIT_LOCALS_REG) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) {
936     FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)));
937     return -1;
938     }
939    
940     if (argw >= 0)
941     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
942     else
943     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw));
944     return -1;
945     }
946    
947     /* see getput_arg below.
948     Note: can_cache is called only for binary operators. Those
949     operators always uses word arguments without write back. */
950     static int can_cache(int arg, sljit_w argw, int next_arg, sljit_w next_argw)
951     {
952     /* Simple operation except for updates. */
953     if ((arg & 0xf0) || !(next_arg & SLJIT_MEM))
954     return 0;
955    
956     if (!(arg & 0xf)) {
957     if ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff)
958     return 1;
959     return 0;
960     }
961    
962     if (argw == next_argw)
963     return 1;
964    
965     if (arg == next_arg && ((sljit_uw)(argw - next_argw) <= 0xfff || (sljit_uw)(next_argw - argw) <= 0xfff))
966     return 1;
967    
968     return 0;
969     }
970    
971     /* Emit the necessary instructions. See can_cache above. */
972     static int getput_arg(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw, int next_arg, sljit_w next_argw)
973     {
974     int tmp_r;
975     sljit_w tmp;
976    
977     SLJIT_ASSERT(arg & SLJIT_MEM);
978     if (!(next_arg & SLJIT_MEM)) {
979     next_arg = 0;
980     next_argw = 0;
981     }
982    
983     tmp_r = (flags & STORE) ? TMP_REG3 : reg;
984    
985     if (SLJIT_UNLIKELY(flags & UPDATE)) {
986     flags &= ~UPDATE;
987     /* Update only applies if a base register exists. */
988     if (arg & 0xf) {
989     /* There is no caching here. */
990     tmp = (arg & 0xf0) >> 4;
991     arg &= 0xf;
992    
993     if (!tmp) {
994     if (!(argw & ~0xfff)) {
995     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw));
996     return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw));
997     }
998    
999     if (compiler->cache_arg == SLJIT_MEM) {
1000     if (argw == compiler->cache_argw) {
1001     tmp = TMP_REG3;
1002     argw = 0;
1003     }
1004     else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1005     FAIL_IF(compiler->error);
1006     compiler->cache_argw = argw;
1007     tmp = TMP_REG3;
1008     argw = 0;
1009     }
1010     }
1011    
1012     if (argw) {
1013     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1014     compiler->cache_arg = SLJIT_MEM;
1015     compiler->cache_argw = argw;
1016     tmp = TMP_REG3;
1017     argw = 0;
1018     }
1019     }
1020    
1021     argw &= 0x3;
1022     if (!argw && IS_3_LO_REGS(reg, arg, tmp)) {
1023     FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp)));
1024     return push_inst16(compiler, ADD | SET_REGS44(arg, tmp));
1025     }
1026     FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp) | (argw << 4)));
1027     return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(tmp) | (argw << 6));
1028     }
1029     }
1030    
1031     SLJIT_ASSERT(!(arg & 0xf0));
1032    
1033     if (compiler->cache_arg == arg) {
1034     if (!((argw - compiler->cache_argw) & ~0xfff))
1035     return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | (argw - compiler->cache_argw));
1036     if (!((compiler->cache_argw - argw) & ~0xff))
1037     return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw));
1038     if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) {
1039     FAIL_IF(compiler->error);
1040     return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
1041     }
1042     }
1043    
1044     next_arg = (arg & 0xf) && (arg == next_arg);
1045     arg &= 0xf;
1046     if (arg && compiler->cache_arg == SLJIT_MEM && compiler->cache_argw == argw)
1047     return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
1048    
1049     compiler->cache_argw = argw;
1050     if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) {
1051     FAIL_IF(compiler->error);
1052     compiler->cache_arg = SLJIT_MEM | arg;
1053     arg = 0;
1054     }
1055     else {
1056     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1057     compiler->cache_arg = SLJIT_MEM;
1058    
1059     if (next_arg) {
1060     FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg)));
1061     compiler->cache_arg = SLJIT_MEM | arg;
1062     arg = 0;
1063     }
1064     }
1065    
1066     if (arg)
1067     return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3));
1068     return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0);
1069     }
1070    
1071     static SLJIT_INLINE int emit_op_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw)
1072     {
1073     if (getput_arg_fast(compiler, flags, reg, arg, argw))
1074     return compiler->error;
1075     compiler->cache_arg = 0;
1076     compiler->cache_argw = 0;
1077     return getput_arg(compiler, flags, reg, arg, argw, 0, 0);
1078     }
1079    
1080 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size)
1081 ph10 662 {
1082     int size;
1083     sljit_ins push;
1084    
1085     CHECK_ERROR();
1086     check_sljit_emit_enter(compiler, args, temporaries, generals, local_size);
1087    
1088     compiler->temporaries = temporaries;
1089     compiler->generals = generals;
1090    
1091     push = (1 << 4);
1092     if (generals >= 5)
1093     push |= 1 << 11;
1094     if (generals >= 4)
1095     push |= 1 << 10;
1096     if (generals >= 3)
1097     push |= 1 << 8;
1098     if (generals >= 2)
1099     push |= 1 << 7;
1100     if (generals >= 1)
1101     push |= 1 << 6;
1102     if (temporaries >= 5)
1103     push |= 1 << 5;
1104     FAIL_IF(generals >= 3
1105     ? push_inst32(compiler, PUSH_W | (1 << 14) | push)
1106     : push_inst16(compiler, PUSH | push));
1107    
1108     /* Stack must be aligned to 8 bytes: */
1109     size = (3 + generals) * sizeof(sljit_uw);
1110     local_size += size;
1111     local_size = (local_size + 7) & ~7;
1112     local_size -= size;
1113     compiler->local_size = local_size;
1114     if (local_size > 0) {
1115     if (local_size <= (127 << 2))
1116     FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
1117     else
1118     FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, local_size));
1119     }
1120    
1121     if (args >= 1)
1122     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG1, SLJIT_TEMPORARY_REG1)));
1123     if (args >= 2)
1124     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG2, SLJIT_TEMPORARY_REG2)));
1125     if (args >= 3)
1126     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_GENERAL_REG3, SLJIT_TEMPORARY_REG3)));
1127    
1128     return SLJIT_SUCCESS;
1129     }
1130    
1131 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_fake_enter(struct sljit_compiler *compiler, int args, int temporaries, int generals, int local_size)
1132 ph10 662 {
1133     int size;
1134    
1135     CHECK_ERROR_VOID();
1136     check_sljit_fake_enter(compiler, args, temporaries, generals, local_size);
1137    
1138     compiler->temporaries = temporaries;
1139     compiler->generals = generals;
1140    
1141     size = (3 + generals) * sizeof(sljit_uw);
1142     local_size += size;
1143     local_size = (local_size + 7) & ~7;
1144     local_size -= size;
1145     compiler->local_size = local_size;
1146     }
1147    
1148 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
1149 ph10 662 {
1150     sljit_ins pop;
1151    
1152     CHECK_ERROR();
1153     check_sljit_emit_return(compiler, src, srcw);
1154    
1155     if (src != SLJIT_UNUSED && src != SLJIT_RETURN_REG) {
1156     if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REG3)
1157     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_RETURN_REG, src)));
1158     else
1159     FAIL_IF(emit_op_mem(compiler, WORD_SIZE, SLJIT_RETURN_REG, src, srcw));
1160     }
1161    
1162     if (compiler->local_size > 0) {
1163     if (compiler->local_size <= (127 << 2))
1164     FAIL_IF(push_inst16(compiler, ADD_SP | (compiler->local_size >> 2)));
1165     else
1166     FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_LOCALS_REG, SLJIT_LOCALS_REG, compiler->local_size));
1167     }
1168    
1169     pop = (1 << 4);
1170     if (compiler->generals >= 5)
1171     pop |= 1 << 11;
1172     if (compiler->generals >= 4)
1173     pop |= 1 << 10;
1174     if (compiler->generals >= 3)
1175     pop |= 1 << 8;
1176     if (compiler->generals >= 2)
1177     pop |= 1 << 7;
1178     if (compiler->generals >= 1)
1179     pop |= 1 << 6;
1180     if (compiler->temporaries >= 5)
1181     pop |= 1 << 5;
1182     return compiler->generals >= 3
1183     ? push_inst32(compiler, POP_W | (1 << 15) | pop)
1184     : push_inst16(compiler, POP | pop);
1185     }
1186    
1187     /* --------------------------------------------------------------------- */
1188     /* Operators */
1189     /* --------------------------------------------------------------------- */
1190    
1191 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
1192 ph10 662 {
1193     CHECK_ERROR();
1194     check_sljit_emit_op0(compiler, op);
1195    
1196     op = GET_OPCODE(op);
1197     switch (op) {
1198     case SLJIT_BREAKPOINT:
1199     push_inst16(compiler, BKPT);
1200     break;
1201     case SLJIT_NOP:
1202     push_inst16(compiler, NOP);
1203     break;
1204     }
1205    
1206     return SLJIT_SUCCESS;
1207     }
1208    
1209 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1210 ph10 662 int dst, sljit_w dstw,
1211     int src, sljit_w srcw)
1212     {
1213     int op_type, dst_r, flags;
1214    
1215     CHECK_ERROR();
1216     check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1217    
1218     compiler->cache_arg = 0;
1219     compiler->cache_argw = 0;
1220    
1221     op_type = GET_OPCODE(op);
1222     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1;
1223    
1224     if (op_type >= SLJIT_MOV && op_type <= SLJIT_MOVU_SI) {
1225     switch (op_type) {
1226     case SLJIT_MOV:
1227     case SLJIT_MOV_UI:
1228     case SLJIT_MOV_SI:
1229     flags = WORD_SIZE;
1230     break;
1231     case SLJIT_MOV_UB:
1232     flags = BYTE_SIZE;
1233     if (src & SLJIT_IMM)
1234     srcw = (unsigned char)srcw;
1235     break;
1236     case SLJIT_MOV_SB:
1237     flags = BYTE_SIZE | SIGNED;
1238     if (src & SLJIT_IMM)
1239     srcw = (signed char)srcw;
1240     break;
1241     case SLJIT_MOV_UH:
1242     flags = HALF_SIZE;
1243     if (src & SLJIT_IMM)
1244     srcw = (unsigned short)srcw;
1245     break;
1246     case SLJIT_MOV_SH:
1247     flags = HALF_SIZE | SIGNED;
1248     if (src & SLJIT_IMM)
1249     srcw = (signed short)srcw;
1250     break;
1251     case SLJIT_MOVU:
1252     case SLJIT_MOVU_UI:
1253     case SLJIT_MOVU_SI:
1254     flags = WORD_SIZE | UPDATE;
1255     break;
1256     case SLJIT_MOVU_UB:
1257     flags = BYTE_SIZE | UPDATE;
1258     if (src & SLJIT_IMM)
1259     srcw = (unsigned char)srcw;
1260     break;
1261     case SLJIT_MOVU_SB:
1262     flags = BYTE_SIZE | SIGNED | UPDATE;
1263     if (src & SLJIT_IMM)
1264     srcw = (signed char)srcw;
1265     break;
1266     case SLJIT_MOVU_UH:
1267     flags = HALF_SIZE | UPDATE;
1268     if (src & SLJIT_IMM)
1269     srcw = (unsigned short)srcw;
1270     break;
1271     case SLJIT_MOVU_SH:
1272     flags = HALF_SIZE | SIGNED | UPDATE;
1273     if (src & SLJIT_IMM)
1274     srcw = (signed short)srcw;
1275     break;
1276     default:
1277     SLJIT_ASSERT_STOP();
1278     flags = 0;
1279     break;
1280     }
1281    
1282     if (src & SLJIT_IMM)
1283     FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1284     else if (src & SLJIT_MEM) {
1285     if (getput_arg_fast(compiler, flags, dst_r, src, srcw))
1286     FAIL_IF(compiler->error);
1287     else
1288     FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw));
1289     } else {
1290     if (dst_r != TMP_REG1)
1291     return emit_op_imm(compiler, op_type, dst_r, TMP_REG1, src);
1292     dst_r = src;
1293     }
1294    
1295     if (dst & SLJIT_MEM) {
1296     if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1297     return compiler->error;
1298     else
1299     return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1300     }
1301     return SLJIT_SUCCESS;
1302     }
1303    
1304     if (op_type == SLJIT_NEG) {
1305     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
1306     compiler->skip_checks = 1;
1307     #endif
1308     return sljit_emit_op2(compiler, GET_FLAGS(op) | SLJIT_SUB, dst, dstw, SLJIT_IMM, 0, src, srcw);
1309     }
1310    
1311     flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
1312     if (src & SLJIT_MEM) {
1313     if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw))
1314     FAIL_IF(compiler->error);
1315     else
1316     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw));
1317     src = TMP_REG2;
1318     }
1319    
1320     if (src & SLJIT_IMM)
1321     flags |= ARG2_IMM;
1322     else
1323     srcw = src;
1324    
1325     emit_op_imm(compiler, flags | op_type, dst_r, TMP_REG1, srcw);
1326    
1327     if (dst & SLJIT_MEM) {
1328     if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw))
1329     return compiler->error;
1330     else
1331     return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0);
1332     }
1333     return SLJIT_SUCCESS;
1334     }
1335    
1336 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1337 ph10 662 int dst, sljit_w dstw,
1338     int src1, sljit_w src1w,
1339     int src2, sljit_w src2w)
1340     {
1341     int dst_r, flags;
1342    
1343     CHECK_ERROR();
1344     check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1345    
1346     compiler->cache_arg = 0;
1347     compiler->cache_argw = 0;
1348    
1349     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1;
1350     flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0);
1351    
1352     if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw))
1353     flags |= SLOW_DEST;
1354    
1355     if (src1 & SLJIT_MEM) {
1356     if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w))
1357     FAIL_IF(compiler->error);
1358     else
1359     flags |= SLOW_SRC1;
1360     }
1361     if (src2 & SLJIT_MEM) {
1362     if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w))
1363     FAIL_IF(compiler->error);
1364     else
1365     flags |= SLOW_SRC2;
1366     }
1367    
1368     if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1369     if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1370     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w));
1371     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
1372     }
1373     else {
1374     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w));
1375     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
1376     }
1377     }
1378     else if (flags & SLOW_SRC1)
1379     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw));
1380     else if (flags & SLOW_SRC2)
1381     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw));
1382    
1383     if (src1 & SLJIT_MEM)
1384     src1 = TMP_REG1;
1385     if (src2 & SLJIT_MEM)
1386     src2 = TMP_REG2;
1387    
1388     if (src1 & SLJIT_IMM)
1389     flags |= ARG1_IMM;
1390     else
1391     src1w = src1;
1392     if (src2 & SLJIT_IMM)
1393     flags |= ARG2_IMM;
1394     else
1395     src2w = src2;
1396    
1397     if (dst == SLJIT_UNUSED)
1398     flags |= UNUSED_RETURN;
1399    
1400     if (GET_OPCODE(op) == SLJIT_MUL && (op & SLJIT_SET_O))
1401     flags |= SET_MULOV;
1402    
1403     emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1404    
1405     if (dst & SLJIT_MEM) {
1406     if (!(flags & SLOW_DEST)) {
1407     getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw);
1408     return compiler->error;
1409     }
1410     return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0);
1411     }
1412     return SLJIT_SUCCESS;
1413     }
1414    
1415     /* --------------------------------------------------------------------- */
1416     /* Floating point operators */
1417     /* --------------------------------------------------------------------- */
1418    
1419 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
1420 ph10 662 {
1421     return 1;
1422     }
1423    
1424     static int emit_fop_mem(struct sljit_compiler *compiler, int flags, int reg, int arg, sljit_w argw)
1425     {
1426     sljit_w tmp;
1427     sljit_w inst = VSTR | ((flags & STORE) ? 0 : 0x00100000);
1428    
1429     SLJIT_ASSERT(arg & SLJIT_MEM);
1430    
1431     /* Fast loads and stores. */
1432     if (SLJIT_UNLIKELY(arg & 0xf0)) {
1433     FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & 0xf) | RM4((arg & 0xf0) >> 4) | ((argw & 0x3) << 6)));
1434     arg = SLJIT_MEM | TMP_REG2;
1435     argw = 0;
1436     }
1437    
1438     if (arg & 0xf) {
1439     if (!(argw & ~0x3fc))
1440     return push_inst32(compiler, inst | 0x800000 | RN4(arg & 0xf) | DD4(reg) | (argw >> 2));
1441     if (!(-argw & ~0x3fc))
1442     return push_inst32(compiler, inst | RN4(arg & 0xf) | DD4(reg) | (-argw >> 2));
1443     }
1444    
1445     SLJIT_ASSERT(!(arg & 0xf0));
1446     if (compiler->cache_arg == arg) {
1447     tmp = argw - compiler->cache_argw;
1448     if (!(tmp & ~0x3fc))
1449     return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2));
1450     if (!(-tmp & ~0x3fc))
1451     return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2));
1452     if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) {
1453     FAIL_IF(compiler->error);
1454     compiler->cache_argw = argw;
1455     return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
1456     }
1457     }
1458    
1459     compiler->cache_arg = arg;
1460     compiler->cache_argw = argw;
1461    
1462     if (SLJIT_UNLIKELY(!(arg & 0xf)))
1463     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1464     else if (emit_set_delta(compiler, TMP_REG3, arg & 0xf, argw) != SLJIT_ERR_UNSUPPORTED)
1465     FAIL_IF(compiler->error);
1466     else {
1467     FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1468     if (arg & 0xf)
1469     FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & 0xf))));
1470     }
1471     return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg));
1472     }
1473    
1474 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
1475 ph10 662 int dst, sljit_w dstw,
1476     int src, sljit_w srcw)
1477     {
1478     int dst_r;
1479    
1480     CHECK_ERROR();
1481     check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
1482    
1483     compiler->cache_arg = 0;
1484     compiler->cache_argw = 0;
1485    
1486     if (GET_OPCODE(op) == SLJIT_FCMP) {
1487     if (dst & SLJIT_MEM) {
1488     emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
1489     dst = TMP_FREG1;
1490     }
1491     if (src & SLJIT_MEM) {
1492     emit_fop_mem(compiler, 0, TMP_FREG2, src, srcw);
1493     src = TMP_FREG2;
1494     }
1495     FAIL_IF(push_inst32(compiler, VCMP_F64 | DD4(dst) | DM4(src)));
1496     return push_inst32(compiler, VMRS);
1497     }
1498    
1499     dst_r = (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? dst : TMP_FREG1;
1500     if (src & SLJIT_MEM) {
1501     emit_fop_mem(compiler, 0, dst_r, src, srcw);
1502     src = dst_r;
1503     }
1504    
1505     switch (GET_OPCODE(op)) {
1506     case SLJIT_FMOV:
1507     if (src != dst_r)
1508     FAIL_IF(push_inst32(compiler, VMOV_F64 | DD4(dst_r) | DM4(src)));
1509     break;
1510     case SLJIT_FNEG:
1511     FAIL_IF(push_inst32(compiler, VNEG_F64 | DD4(dst_r) | DM4(src)));
1512     break;
1513     case SLJIT_FABS:
1514     FAIL_IF(push_inst32(compiler, VABS_F64 | DD4(dst_r) | DM4(src)));
1515     break;
1516     }
1517    
1518     if (dst & SLJIT_MEM)
1519     return emit_fop_mem(compiler, STORE, TMP_FREG1, dst, dstw);
1520     return SLJIT_SUCCESS;
1521     }
1522    
1523 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
1524 ph10 662 int dst, sljit_w dstw,
1525     int src1, sljit_w src1w,
1526     int src2, sljit_w src2w)
1527     {
1528     int dst_r;
1529    
1530     CHECK_ERROR();
1531     check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1532    
1533     compiler->cache_arg = 0;
1534     compiler->cache_argw = 0;
1535    
1536     dst_r = (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? dst : TMP_FREG1;
1537     if (src1 & SLJIT_MEM) {
1538     emit_fop_mem(compiler, 0, TMP_FREG1, src1, src1w);
1539     src1 = TMP_FREG1;
1540     }
1541     if (src2 & SLJIT_MEM) {
1542     emit_fop_mem(compiler, 0, TMP_FREG2, src2, src2w);
1543     src2 = TMP_FREG2;
1544     }
1545    
1546     switch (GET_OPCODE(op)) {
1547     case SLJIT_FADD:
1548     FAIL_IF(push_inst32(compiler, VADD_F64 | DD4(dst_r) | DN4(src1) | DM4(src2)));
1549     break;
1550     case SLJIT_FSUB:
1551     FAIL_IF(push_inst32(compiler, VSUB_F64 | DD4(dst_r) | DN4(src1) | DM4(src2)));
1552     break;
1553     case SLJIT_FMUL:
1554     FAIL_IF(push_inst32(compiler, VMUL_F64 | DD4(dst_r) | DN4(src1) | DM4(src2)));
1555     break;
1556     case SLJIT_FDIV:
1557     FAIL_IF(push_inst32(compiler, VDIV_F64 | DD4(dst_r) | DN4(src1) | DM4(src2)));
1558     break;
1559     }
1560    
1561     if (dst & SLJIT_MEM)
1562     return emit_fop_mem(compiler, STORE, TMP_FREG1, dst, dstw);
1563     return SLJIT_SUCCESS;
1564     }
1565    
1566     /* --------------------------------------------------------------------- */
1567     /* Other instructions */
1568     /* --------------------------------------------------------------------- */
1569    
1570 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_enter(struct sljit_compiler *compiler, int dst, sljit_w dstw, int args, int temporaries, int generals, int local_size)
1571 ph10 662 {
1572     int size;
1573    
1574     CHECK_ERROR();
1575     check_sljit_emit_fast_enter(compiler, dst, dstw, args, temporaries, generals, local_size);
1576    
1577     compiler->temporaries = temporaries;
1578     compiler->generals = generals;
1579    
1580     size = (3 + generals) * sizeof(sljit_uw);
1581     local_size += size;
1582     local_size = (local_size + 7) & ~7;
1583     local_size -= size;
1584     compiler->local_size = local_size;
1585    
1586     if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1587     return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3));
1588     else if (dst & SLJIT_MEM) {
1589     if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw))
1590     return compiler->error;
1591     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3)));
1592     compiler->cache_arg = 0;
1593     compiler->cache_argw = 0;
1594     return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0);
1595     }
1596    
1597     return SLJIT_SUCCESS;
1598     }
1599    
1600 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fast_return(struct sljit_compiler *compiler, int src, sljit_w srcw)
1601 ph10 662 {
1602     CHECK_ERROR();
1603     check_sljit_emit_fast_return(compiler, src, srcw);
1604    
1605     if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
1606     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src)));
1607     else if (src & SLJIT_MEM) {
1608     if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw))
1609     FAIL_IF(compiler->error);
1610     else {
1611     compiler->cache_arg = 0;
1612     compiler->cache_argw = 0;
1613     FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0));
1614     FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2)));
1615     }
1616     }
1617     else if (src & SLJIT_IMM)
1618     FAIL_IF(load_immediate(compiler, TMP_REG3, srcw));
1619     return push_inst16(compiler, BLX | RN3(TMP_REG3));
1620     }
1621    
1622     /* --------------------------------------------------------------------- */
1623     /* Conditional instructions */
1624     /* --------------------------------------------------------------------- */
1625    
1626     static sljit_uw get_cc(int type)
1627     {
1628     switch (type) {
1629     case SLJIT_C_EQUAL:
1630     case SLJIT_C_MUL_NOT_OVERFLOW:
1631     case SLJIT_C_FLOAT_EQUAL:
1632     return 0x0;
1633    
1634     case SLJIT_C_NOT_EQUAL:
1635     case SLJIT_C_MUL_OVERFLOW:
1636     case SLJIT_C_FLOAT_NOT_EQUAL:
1637     return 0x1;
1638    
1639     case SLJIT_C_LESS:
1640     case SLJIT_C_FLOAT_LESS:
1641     return 0x3;
1642    
1643     case SLJIT_C_GREATER_EQUAL:
1644     case SLJIT_C_FLOAT_GREATER_EQUAL:
1645     return 0x2;
1646    
1647     case SLJIT_C_GREATER:
1648     case SLJIT_C_FLOAT_GREATER:
1649     return 0x8;
1650    
1651     case SLJIT_C_LESS_EQUAL:
1652     case SLJIT_C_FLOAT_LESS_EQUAL:
1653     return 0x9;
1654    
1655     case SLJIT_C_SIG_LESS:
1656     return 0xb;
1657    
1658     case SLJIT_C_SIG_GREATER_EQUAL:
1659     return 0xa;
1660    
1661     case SLJIT_C_SIG_GREATER:
1662     return 0xc;
1663    
1664     case SLJIT_C_SIG_LESS_EQUAL:
1665     return 0xd;
1666    
1667     case SLJIT_C_OVERFLOW:
1668     case SLJIT_C_FLOAT_NAN:
1669     return 0x6;
1670    
1671     case SLJIT_C_NOT_OVERFLOW:
1672     case SLJIT_C_FLOAT_NOT_NAN:
1673     return 0x7;
1674    
1675     default: /* SLJIT_JUMP */
1676     return 0xe;
1677     }
1678     }
1679    
1680 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
1681 ph10 662 {
1682     struct sljit_label *label;
1683    
1684     CHECK_ERROR_PTR();
1685     check_sljit_emit_label(compiler);
1686    
1687     if (compiler->last_label && compiler->last_label->size == compiler->size)
1688     return compiler->last_label;
1689    
1690     label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
1691     PTR_FAIL_IF(!label);
1692     set_label(label, compiler);
1693     return label;
1694     }
1695    
1696 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
1697 ph10 662 {
1698     struct sljit_jump *jump;
1699     int cc;
1700    
1701     CHECK_ERROR_PTR();
1702     check_sljit_emit_jump(compiler, type);
1703    
1704     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1705     PTR_FAIL_IF(!jump);
1706     set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
1707     type &= 0xff;
1708    
1709     /* In ARM, we don't need to touch the arguments. */
1710     PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
1711     if (type < SLJIT_JUMP) {
1712     jump->flags |= IS_CONDITIONAL;
1713     cc = get_cc(type);
1714     jump->flags |= cc << 8;
1715     PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
1716     }
1717    
1718     jump->addr = compiler->size;
1719     if (type <= SLJIT_JUMP)
1720     PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
1721     else {
1722     jump->flags |= IS_BL;
1723     PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
1724     }
1725    
1726     return jump;
1727     }
1728    
1729 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
1730 ph10 662 {
1731     struct sljit_jump *jump;
1732    
1733     CHECK_ERROR();
1734     check_sljit_emit_ijump(compiler, type, src, srcw);
1735    
1736     /* In ARM, we don't need to touch the arguments. */
1737     if (src & SLJIT_IMM) {
1738     jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
1739     FAIL_IF(!jump);
1740 zherczeg 722 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
1741 ph10 662 jump->u.target = srcw;
1742    
1743     FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
1744     jump->addr = compiler->size;
1745 zherczeg 722 FAIL_IF(push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1)));
1746 ph10 662 }
1747     else {
1748 zherczeg 722 if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS)
1749     return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
1750 ph10 662
1751     FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw));
1752 zherczeg 722 if (type >= SLJIT_FAST_CALL)
1753 ph10 662 return push_inst16(compiler, BLX | RN3(TMP_REG1));
1754     }
1755     return SLJIT_SUCCESS;
1756     }
1757    
1758 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
1759 ph10 662 {
1760     int dst_r;
1761     sljit_uw cc;
1762    
1763     CHECK_ERROR();
1764     check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
1765    
1766     if (dst == SLJIT_UNUSED)
1767     return SLJIT_SUCCESS;
1768    
1769     cc = get_cc(type);
1770     if (GET_OPCODE(op) == SLJIT_OR && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1771     FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
1772     FAIL_IF(push_inst32(compiler, ORRI | RN4(dst) | RD4(dst) | 0x1));
1773     if (op & SLJIT_SET_E) {
1774     if (reg_map[dst] <= 7)
1775     return push_inst16(compiler, ORRS | RD3(dst) | RN3(dst));
1776     return push_inst32(compiler, ORR_W | SET_FLAGS | RD4(TMP_REG1) | RN4(dst) | RM4(dst));
1777     }
1778     return SLJIT_SUCCESS;
1779     }
1780    
1781     dst_r = TMP_REG2;
1782     if (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && reg_map[dst] <= 7)
1783     dst_r = dst;
1784    
1785     FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
1786     FAIL_IF(push_inst16(compiler, MOVSI | 0x1 | RDN3(dst_r)));
1787     FAIL_IF(push_inst16(compiler, MOVSI | 0x0 | RDN3(dst_r)));
1788    
1789     if (dst_r == TMP_REG2) {
1790     if (GET_OPCODE(op) == SLJIT_OR) {
1791     #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
1792     compiler->skip_checks = 1;
1793     #endif
1794     return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG2, 0);
1795     }
1796     if (dst & SLJIT_MEM)
1797     return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw);
1798     else
1799     return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
1800     }
1801    
1802     return SLJIT_SUCCESS;
1803     }
1804    
1805 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
1806 ph10 662 {
1807     struct sljit_const *const_;
1808     int dst_r;
1809    
1810     CHECK_ERROR_PTR();
1811     check_sljit_emit_const(compiler, dst, dstw, init_value);
1812    
1813     const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
1814     PTR_FAIL_IF(!const_);
1815     set_const(const_, compiler);
1816    
1817     dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REG1;
1818     PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value));
1819    
1820     if (dst & SLJIT_MEM)
1821     PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw));
1822     return const_;
1823     }
1824    
1825 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
1826 ph10 662 {
1827     inline_set_jump_addr(addr, new_addr, 1);
1828     }
1829    
1830 zherczeg 740 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
1831 ph10 662 {
1832     sljit_uh* inst = (sljit_uh*)addr;
1833     modify_imm32_const(inst, new_constant);
1834     SLJIT_CACHE_FLUSH(inst, inst + 3);
1835     }

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12