/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 955 - (hide annotations) (download)
Tue Apr 3 15:32:36 2012 UTC (2 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 231288 byte(s)
JIT compiler update
1 ph10 664 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 664
11     The machine code generator part (this module) was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 664
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47     #include "pcre_internal.h"
48    
49     #ifdef SUPPORT_JIT
50    
51     /* All-in-one: Since we use the JIT compiler only from here,
52     we just include it. This way we don't need to touch the build
53     system files. */
54    
55 ph10 836 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56     #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 ph10 664 #define SLJIT_CONFIG_AUTO 1
58 zherczeg 741 #define SLJIT_CONFIG_STATIC 1
59 ph10 664 #define SLJIT_VERBOSE 0
60     #define SLJIT_DEBUG 0
61    
62     #include "sljit/sljitLir.c"
63    
64     #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 ph10 836 #error Unsupported architecture
66 ph10 664 #endif
67    
68     /* Allocate memory on the stack. Fast, but limited size. */
69     #define LOCAL_SPACE_SIZE 32768
70    
71     #define STACK_GROWTH_RATE 8192
72    
73     /* Enable to check that the allocation could destroy temporaries. */
74     #if defined SLJIT_DEBUG && SLJIT_DEBUG
75     #define DESTROY_REGISTERS 1
76     #endif
77    
78     /*
79     Short summary about the backtracking mechanism empolyed by the jit code generator:
80    
81     The code generator follows the recursive nature of the PERL compatible regular
82     expressions. The basic blocks of regular expressions are condition checkers
83     whose execute different commands depending on the result of the condition check.
84     The relationship between the operators can be horizontal (concatenation) and
85     vertical (sub-expression) (See struct fallback_common for more details).
86    
87     'ab' - 'a' and 'b' regexps are concatenated
88     'a+' - 'a' is the sub-expression of the '+' operator
89    
90     The condition checkers are boolean (true/false) checkers. Machine code is generated
91     for the checker itself and for the actions depending on the result of the checker.
92     The 'true' case is called as the hot path (expected path), and the other is called as
93     the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94     branches on the hot path.
95    
96     Greedy star operator (*) :
97     Hot path: match happens.
98     Fallback path: match failed.
99     Non-greedy star operator (*?) :
100     Hot path: no need to perform a match.
101     Fallback path: match is required.
102    
103     The following example shows how the code generated for a capturing bracket
104     with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105     we have the following regular expression:
106    
107     A(B|C)D
108    
109     The generated code will be the following:
110    
111     A hot path
112     '(' hot path (pushing arguments to the stack)
113     B hot path
114     ')' hot path (pushing arguments to the stack)
115     D hot path
116     return with successful match
117    
118     D fallback path
119     ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120     B fallback path
121     C expected path
122     jump to D hot path
123     C fallback path
124     A fallback path
125 ph10 691
126 ph10 664 Notice, that the order of fallback code paths are the opposite of the fast
127     code paths. In this way the topmost value on the stack is always belong
128     to the current fallback code path. The fallback code path must check
129     whether there is a next alternative. If so, it needs to jump back to
130     the hot path eventually. Otherwise it needs to clear out its own stack
131     frame and continue the execution on the fallback code paths.
132     */
133    
134     /*
135     Saved stack frames:
136    
137     Atomic blocks and asserts require reloading the values of local variables
138     when the fallback mechanism performed. Because of OP_RECURSE, the locals
139     are not necessarly known in compile time, thus we need a dynamic restore
140     mechanism.
141    
142     The stack frames are stored in a chain list, and have the following format:
143     ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144    
145     Thus we can restore the locals to a particular point in the stack.
146     */
147    
148     typedef struct jit_arguments {
149     /* Pointers first. */
150     struct sljit_stack *stack;
151 ph10 836 const pcre_uchar *str;
152     const pcre_uchar *begin;
153     const pcre_uchar *end;
154 ph10 664 int *offsets;
155 zherczeg 929 pcre_uchar *uchar_ptr;
156     pcre_uchar *mark_ptr;
157 ph10 664 /* Everything else after. */
158     int offsetcount;
159 ph10 677 int calllimit;
160 ph10 836 pcre_uint8 notbol;
161     pcre_uint8 noteol;
162     pcre_uint8 notempty;
163     pcre_uint8 notempty_atstart;
164 ph10 664 } jit_arguments;
165    
166 zherczeg 914 typedef struct executable_functions {
167 zherczeg 915 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 zherczeg 852 PUBL(jit_callback) callback;
169 ph10 664 void *userdata;
170 zherczeg 915 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 zherczeg 914 } executable_functions;
172 ph10 664
173     typedef struct jump_list {
174     struct sljit_jump *jump;
175     struct jump_list *next;
176     } jump_list;
177    
178 zherczeg 696 enum stub_types { stack_alloc };
179 ph10 664
180     typedef struct stub_list {
181     enum stub_types type;
182     int data;
183     struct sljit_jump *start;
184     struct sljit_label *leave;
185     struct stub_list *next;
186     } stub_list;
187    
188     typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189    
190     /* The following structure is the key data type for the recursive
191     code generator. It is allocated by compile_hotpath, and contains
192     the aguments for compile_fallbackpath. Must be the first member
193     of its descendants. */
194     typedef struct fallback_common {
195     /* Concatenation stack. */
196     struct fallback_common *prev;
197     jump_list *nextfallbacks;
198     /* Internal stack (for component operators). */
199     struct fallback_common *top;
200     jump_list *topfallbacks;
201     /* Opcode pointer. */
202 ph10 836 pcre_uchar *cc;
203 ph10 664 } fallback_common;
204    
205     typedef struct assert_fallback {
206     fallback_common common;
207     jump_list *condfailed;
208     /* Less than 0 (-1) if a frame is not needed. */
209     int framesize;
210     /* Points to our private memory word on the stack. */
211     int localptr;
212     /* For iterators. */
213     struct sljit_label *hotpath;
214     } assert_fallback;
215    
216     typedef struct bracket_fallback {
217     fallback_common common;
218     /* Where to coninue if an alternative is successfully matched. */
219     struct sljit_label *althotpath;
220     /* For rmin and rmax iterators. */
221     struct sljit_label *recursivehotpath;
222     /* For greedy ? operator. */
223     struct sljit_label *zerohotpath;
224     /* Contains the branches of a failed condition. */
225     union {
226     /* Both for OP_COND, OP_SCOND. */
227     jump_list *condfailed;
228     assert_fallback *assert;
229     /* For OP_ONCE. -1 if not needed. */
230     int framesize;
231     } u;
232     /* Points to our private memory word on the stack. */
233     int localptr;
234     } bracket_fallback;
235    
236     typedef struct bracketpos_fallback {
237     fallback_common common;
238     /* Points to our private memory word on the stack. */
239     int localptr;
240     /* Reverting stack is needed. */
241     int framesize;
242     /* Allocated stack size. */
243     int stacksize;
244     } bracketpos_fallback;
245    
246     typedef struct braminzero_fallback {
247     fallback_common common;
248     struct sljit_label *hotpath;
249     } braminzero_fallback;
250    
251     typedef struct iterator_fallback {
252     fallback_common common;
253     /* Next iteration. */
254     struct sljit_label *hotpath;
255     } iterator_fallback;
256    
257     typedef struct recurse_entry {
258     struct recurse_entry *next;
259     /* Contains the function entry. */
260     struct sljit_label *entry;
261     /* Collects the calls until the function is not created. */
262     jump_list *calls;
263     /* Points to the starting opcode. */
264     int start;
265     } recurse_entry;
266    
267     typedef struct recurse_fallback {
268     fallback_common common;
269     } recurse_fallback;
270    
271     typedef struct compiler_common {
272     struct sljit_compiler *compiler;
273 ph10 836 pcre_uchar *start;
274 zherczeg 920
275 zherczeg 955 /* Opcode local area direct map. */
276 ph10 664 int *localptrs;
277 zherczeg 920 int cbraptr;
278     /* OVector starting point. Must be divisible by 2. */
279     int ovector_start;
280     /* Last known position of the requested byte. */
281     int req_char_ptr;
282     /* Head of the last recursion. */
283     int recursive_head;
284     /* First inspected character for partial matching. */
285     int start_used_ptr;
286     /* Starting pointer for partial soft matches. */
287     int hit_start;
288     /* End pointer of the first line. */
289     int first_line_end;
290 zherczeg 929 /* Points to the marked string. */
291     int mark_ptr;
292 zherczeg 920
293     /* Other */
294 ph10 836 const pcre_uint8 *fcc;
295 ph10 664 sljit_w lcc;
296 zherczeg 914 int mode;
297 ph10 664 int nltype;
298     int newline;
299     int bsr_nltype;
300     int endonly;
301 zherczeg 929 BOOL has_set_som;
302 ph10 664 sljit_w ctypes;
303 zherczeg 741 sljit_uw name_table;
304     sljit_w name_count;
305     sljit_w name_entry_size;
306 zherczeg 920
307     /* Labels and jump lists. */
308 zherczeg 914 struct sljit_label *partialmatchlabel;
309 zherczeg 941 struct sljit_label *leavelabel;
310 ph10 664 struct sljit_label *acceptlabel;
311     stub_list *stubs;
312     recurse_entry *entries;
313     recurse_entry *currententry;
314 zherczeg 914 jump_list *partialmatch;
315 zherczeg 941 jump_list *leave;
316 ph10 664 jump_list *accept;
317 ph10 677 jump_list *calllimit;
318 ph10 664 jump_list *stackalloc;
319     jump_list *revertframes;
320     jump_list *wordboundary;
321     jump_list *anynewline;
322     jump_list *hspace;
323     jump_list *vspace;
324     jump_list *casefulcmp;
325     jump_list *caselesscmp;
326     BOOL jscript_compat;
327 ph10 836 #ifdef SUPPORT_UTF
328     BOOL utf;
329 ph10 664 #ifdef SUPPORT_UCP
330 ph10 836 BOOL use_ucp;
331 ph10 664 #endif
332 ph10 836 jump_list *utfreadchar;
333     #ifdef COMPILE_PCRE8
334     jump_list *utfreadtype8;
335 ph10 664 #endif
336 ph10 836 #endif /* SUPPORT_UTF */
337 ph10 664 #ifdef SUPPORT_UCP
338     jump_list *getucd;
339     #endif
340     } compiler_common;
341    
342     /* For byte_sequence_compare. */
343    
344     typedef struct compare_context {
345     int length;
346     int sourcereg;
347     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
348 ph10 836 int ucharptr;
349 ph10 664 union {
350 ph10 836 sljit_i asint;
351 zherczeg 847 sljit_uh asushort;
352 ph10 836 #ifdef COMPILE_PCRE8
353 ph10 664 sljit_ub asbyte;
354 ph10 836 sljit_ub asuchars[4];
355     #else
356     #ifdef COMPILE_PCRE16
357     sljit_uh asuchars[2];
358     #endif
359     #endif
360 ph10 664 } c;
361     union {
362 ph10 836 sljit_i asint;
363 zherczeg 847 sljit_uh asushort;
364 ph10 836 #ifdef COMPILE_PCRE8
365 ph10 664 sljit_ub asbyte;
366 ph10 836 sljit_ub asuchars[4];
367     #else
368     #ifdef COMPILE_PCRE16
369     sljit_uh asuchars[2];
370     #endif
371     #endif
372 ph10 664 } oc;
373     #endif
374     } compare_context;
375    
376     enum {
377     frame_end = 0,
378 zherczeg 929 frame_setstrbegin = -1,
379     frame_setmark = -2
380 ph10 664 };
381    
382 zherczeg 883 /* Undefine sljit macros. */
383     #undef CMP
384    
385 ph10 664 /* Used for accessing the elements of the stack. */
386     #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
387    
388     #define TMP1 SLJIT_TEMPORARY_REG1
389     #define TMP2 SLJIT_TEMPORARY_REG3
390     #define TMP3 SLJIT_TEMPORARY_EREG2
391 zherczeg 880 #define STR_PTR SLJIT_SAVED_REG1
392     #define STR_END SLJIT_SAVED_REG2
393 ph10 664 #define STACK_TOP SLJIT_TEMPORARY_REG2
394 zherczeg 880 #define STACK_LIMIT SLJIT_SAVED_REG3
395     #define ARGUMENTS SLJIT_SAVED_EREG1
396     #define CALL_COUNT SLJIT_SAVED_EREG2
397 ph10 664 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
398    
399     /* Locals layout. */
400     /* These two locals can be used by the current opcode. */
401     #define LOCALS0 (0 * sizeof(sljit_w))
402     #define LOCALS1 (1 * sizeof(sljit_w))
403     /* Two local variables for possessive quantifiers (char1 cannot use them). */
404     #define POSSESSIVE0 (2 * sizeof(sljit_w))
405     #define POSSESSIVE1 (3 * sizeof(sljit_w))
406 ph10 677 /* Max limit of recursions. */
407 zherczeg 920 #define CALL_LIMIT (4 * sizeof(sljit_w))
408 ph10 664 /* The output vector is stored on the stack, and contains pointers
409     to characters. The vector data is divided into two groups: the first
410     group contains the start / end character pointers, and the second is
411     the start pointers when the end of the capturing group has not yet reached. */
412 zherczeg 920 #define OVECTOR_START (common->ovector_start)
413 ph10 664 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
414     #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
415 ph10 836 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
416 ph10 664
417 ph10 836 #ifdef COMPILE_PCRE8
418     #define MOV_UCHAR SLJIT_MOV_UB
419     #define MOVU_UCHAR SLJIT_MOVU_UB
420     #else
421     #ifdef COMPILE_PCRE16
422     #define MOV_UCHAR SLJIT_MOV_UH
423     #define MOVU_UCHAR SLJIT_MOVU_UH
424     #else
425     #error Unsupported compiling mode
426     #endif
427     #endif
428    
429 ph10 664 /* Shortcuts. */
430     #define DEFINE_COMPILER \
431     struct sljit_compiler *compiler = common->compiler
432     #define OP1(op, dst, dstw, src, srcw) \
433     sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
434     #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
435     sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
436     #define LABEL() \
437     sljit_emit_label(compiler)
438     #define JUMP(type) \
439     sljit_emit_jump(compiler, (type))
440     #define JUMPTO(type, label) \
441     sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
442     #define JUMPHERE(jump) \
443     sljit_set_label((jump), sljit_emit_label(compiler))
444     #define CMP(type, src1, src1w, src2, src2w) \
445     sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
446     #define CMPTO(type, src1, src1w, src2, src2w, label) \
447     sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
448     #define COND_VALUE(op, dst, dstw, type) \
449     sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
450 zherczeg 955 #define GET_LOCAL_BASE(dst, dstw, offset) \
451     sljit_get_local_base(compiler, (dst), (dstw), (offset))
452 ph10 664
453 ph10 836 static pcre_uchar* bracketend(pcre_uchar* cc)
454 ph10 664 {
455     SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
456     do cc += GET(cc, 1); while (*cc == OP_ALT);
457     SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
458     cc += 1 + LINK_SIZE;
459     return cc;
460     }
461    
462 ph10 691 /* Functions whose might need modification for all new supported opcodes:
463 ph10 664 next_opcode
464     get_localspace
465     set_localptrs
466     get_framesize
467     init_frame
468     get_localsize
469     copy_locals
470     compile_hotpath
471     compile_fallbackpath
472     */
473    
474 ph10 836 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
475 ph10 664 {
476     SLJIT_UNUSED_ARG(common);
477     switch(*cc)
478     {
479     case OP_SOD:
480     case OP_SOM:
481     case OP_SET_SOM:
482     case OP_NOT_WORD_BOUNDARY:
483     case OP_WORD_BOUNDARY:
484     case OP_NOT_DIGIT:
485     case OP_DIGIT:
486     case OP_NOT_WHITESPACE:
487     case OP_WHITESPACE:
488     case OP_NOT_WORDCHAR:
489     case OP_WORDCHAR:
490     case OP_ANY:
491     case OP_ALLANY:
492     case OP_ANYNL:
493     case OP_NOT_HSPACE:
494     case OP_HSPACE:
495     case OP_NOT_VSPACE:
496     case OP_VSPACE:
497     case OP_EXTUNI:
498     case OP_EODN:
499     case OP_EOD:
500     case OP_CIRC:
501     case OP_CIRCM:
502     case OP_DOLL:
503     case OP_DOLLM:
504     case OP_TYPESTAR:
505     case OP_TYPEMINSTAR:
506     case OP_TYPEPLUS:
507     case OP_TYPEMINPLUS:
508     case OP_TYPEQUERY:
509     case OP_TYPEMINQUERY:
510     case OP_TYPEPOSSTAR:
511     case OP_TYPEPOSPLUS:
512     case OP_TYPEPOSQUERY:
513     case OP_CRSTAR:
514     case OP_CRMINSTAR:
515     case OP_CRPLUS:
516     case OP_CRMINPLUS:
517     case OP_CRQUERY:
518     case OP_CRMINQUERY:
519     case OP_DEF:
520     case OP_BRAZERO:
521     case OP_BRAMINZERO:
522     case OP_BRAPOSZERO:
523 zherczeg 941 case OP_COMMIT:
524 ph10 664 case OP_FAIL:
525     case OP_ACCEPT:
526     case OP_ASSERT_ACCEPT:
527     case OP_SKIPZERO:
528     return cc + 1;
529    
530 zherczeg 736 case OP_ANYBYTE:
531 ph10 836 #ifdef SUPPORT_UTF
532     if (common->utf) return NULL;
533 zherczeg 736 #endif
534     return cc + 1;
535    
536 ph10 664 case OP_CHAR:
537     case OP_CHARI:
538     case OP_NOT:
539     case OP_NOTI:
540     case OP_STAR:
541     case OP_MINSTAR:
542     case OP_PLUS:
543     case OP_MINPLUS:
544     case OP_QUERY:
545     case OP_MINQUERY:
546     case OP_POSSTAR:
547     case OP_POSPLUS:
548     case OP_POSQUERY:
549     case OP_STARI:
550     case OP_MINSTARI:
551     case OP_PLUSI:
552     case OP_MINPLUSI:
553     case OP_QUERYI:
554     case OP_MINQUERYI:
555     case OP_POSSTARI:
556     case OP_POSPLUSI:
557     case OP_POSQUERYI:
558     case OP_NOTSTAR:
559     case OP_NOTMINSTAR:
560     case OP_NOTPLUS:
561     case OP_NOTMINPLUS:
562     case OP_NOTQUERY:
563     case OP_NOTMINQUERY:
564     case OP_NOTPOSSTAR:
565     case OP_NOTPOSPLUS:
566     case OP_NOTPOSQUERY:
567     case OP_NOTSTARI:
568     case OP_NOTMINSTARI:
569     case OP_NOTPLUSI:
570     case OP_NOTMINPLUSI:
571     case OP_NOTQUERYI:
572     case OP_NOTMINQUERYI:
573     case OP_NOTPOSSTARI:
574     case OP_NOTPOSPLUSI:
575     case OP_NOTPOSQUERYI:
576     cc += 2;
577 ph10 836 #ifdef SUPPORT_UTF
578     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 ph10 664 #endif
580     return cc;
581    
582     case OP_UPTO:
583     case OP_MINUPTO:
584     case OP_EXACT:
585     case OP_POSUPTO:
586     case OP_UPTOI:
587     case OP_MINUPTOI:
588     case OP_EXACTI:
589     case OP_POSUPTOI:
590     case OP_NOTUPTO:
591     case OP_NOTMINUPTO:
592     case OP_NOTEXACT:
593     case OP_NOTPOSUPTO:
594     case OP_NOTUPTOI:
595     case OP_NOTMINUPTOI:
596     case OP_NOTEXACTI:
597     case OP_NOTPOSUPTOI:
598 ph10 836 cc += 2 + IMM2_SIZE;
599     #ifdef SUPPORT_UTF
600     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
601 ph10 664 #endif
602     return cc;
603    
604     case OP_NOTPROP:
605     case OP_PROP:
606 ph10 836 return cc + 1 + 2;
607    
608 ph10 664 case OP_TYPEUPTO:
609     case OP_TYPEMINUPTO:
610     case OP_TYPEEXACT:
611     case OP_TYPEPOSUPTO:
612     case OP_REF:
613     case OP_REFI:
614     case OP_CREF:
615 zherczeg 741 case OP_NCREF:
616     case OP_RREF:
617     case OP_NRREF:
618 ph10 664 case OP_CLOSE:
619 ph10 836 cc += 1 + IMM2_SIZE;
620 ph10 664 return cc;
621    
622     case OP_CRRANGE:
623     case OP_CRMINRANGE:
624 ph10 836 return cc + 1 + 2 * IMM2_SIZE;
625 ph10 664
626     case OP_CLASS:
627     case OP_NCLASS:
628 ph10 836 return cc + 1 + 32 / sizeof(pcre_uchar);
629 ph10 664
630 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
631 ph10 664 case OP_XCLASS:
632     return cc + GET(cc, 1);
633     #endif
634    
635     case OP_RECURSE:
636     case OP_ASSERT:
637     case OP_ASSERT_NOT:
638     case OP_ASSERTBACK:
639     case OP_ASSERTBACK_NOT:
640     case OP_REVERSE:
641     case OP_ONCE:
642 zherczeg 726 case OP_ONCE_NC:
643 ph10 664 case OP_BRA:
644     case OP_BRAPOS:
645     case OP_COND:
646     case OP_SBRA:
647     case OP_SBRAPOS:
648     case OP_SCOND:
649     case OP_ALT:
650     case OP_KET:
651     case OP_KETRMAX:
652     case OP_KETRMIN:
653     case OP_KETRPOS:
654     return cc + 1 + LINK_SIZE;
655    
656     case OP_CBRA:
657     case OP_CBRAPOS:
658     case OP_SCBRA:
659     case OP_SCBRAPOS:
660 ph10 836 return cc + 1 + LINK_SIZE + IMM2_SIZE;
661 ph10 664
662 zherczeg 929 case OP_MARK:
663     return cc + 1 + 2 + cc[1];
664    
665 ph10 664 default:
666     return NULL;
667     }
668     }
669    
670 ph10 836 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
671 ph10 664 {
672     int localspace = 0;
673 ph10 836 pcre_uchar *alternative;
674 ph10 664 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
675     while (cc < ccend)
676     {
677     switch(*cc)
678     {
679 zherczeg 929 case OP_SET_SOM:
680     common->has_set_som = TRUE;
681     cc += 1;
682     break;
683    
684 ph10 664 case OP_ASSERT:
685     case OP_ASSERT_NOT:
686     case OP_ASSERTBACK:
687     case OP_ASSERTBACK_NOT:
688     case OP_ONCE:
689 zherczeg 726 case OP_ONCE_NC:
690 ph10 664 case OP_BRAPOS:
691     case OP_SBRA:
692     case OP_SBRAPOS:
693     case OP_SCOND:
694     localspace += sizeof(sljit_w);
695     cc += 1 + LINK_SIZE;
696     break;
697    
698     case OP_CBRAPOS:
699     case OP_SCBRAPOS:
700     localspace += sizeof(sljit_w);
701 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
702 ph10 664 break;
703    
704     case OP_COND:
705     /* Might be a hidden SCOND. */
706     alternative = cc + GET(cc, 1);
707     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
708     localspace += sizeof(sljit_w);
709     cc += 1 + LINK_SIZE;
710     break;
711    
712 zherczeg 920 case OP_RECURSE:
713     /* Set its value only once. */
714     if (common->recursive_head == 0)
715     {
716     common->recursive_head = common->ovector_start;
717     common->ovector_start += sizeof(sljit_w);
718     }
719     cc += 1 + LINK_SIZE;
720     break;
721    
722 zherczeg 929 case OP_MARK:
723     if (common->mark_ptr == 0)
724     {
725     common->mark_ptr = common->ovector_start;
726     common->ovector_start += sizeof(sljit_w);
727     }
728     cc += 1 + 2 + cc[1];
729     break;
730    
731 ph10 664 default:
732     cc = next_opcode(common, cc);
733     if (cc == NULL)
734     return -1;
735     break;
736     }
737     }
738     return localspace;
739     }
740    
741 ph10 836 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
742 ph10 664 {
743 ph10 836 pcre_uchar *cc = common->start;
744     pcre_uchar *alternative;
745 ph10 664 while (cc < ccend)
746     {
747     switch(*cc)
748     {
749     case OP_ASSERT:
750     case OP_ASSERT_NOT:
751     case OP_ASSERTBACK:
752     case OP_ASSERTBACK_NOT:
753     case OP_ONCE:
754 zherczeg 726 case OP_ONCE_NC:
755 ph10 664 case OP_BRAPOS:
756     case OP_SBRA:
757     case OP_SBRAPOS:
758     case OP_SCOND:
759     common->localptrs[cc - common->start] = localptr;
760     localptr += sizeof(sljit_w);
761     cc += 1 + LINK_SIZE;
762     break;
763    
764     case OP_CBRAPOS:
765     case OP_SCBRAPOS:
766     common->localptrs[cc - common->start] = localptr;
767     localptr += sizeof(sljit_w);
768 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
769 ph10 664 break;
770    
771     case OP_COND:
772     /* Might be a hidden SCOND. */
773     alternative = cc + GET(cc, 1);
774     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
775     {
776     common->localptrs[cc - common->start] = localptr;
777     localptr += sizeof(sljit_w);
778     }
779     cc += 1 + LINK_SIZE;
780     break;
781    
782     default:
783     cc = next_opcode(common, cc);
784     SLJIT_ASSERT(cc != NULL);
785     break;
786     }
787     }
788     }
789    
790     /* Returns with -1 if no need for frame. */
791 ph10 836 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
792 ph10 664 {
793 ph10 836 pcre_uchar *ccend = bracketend(cc);
794 ph10 664 int length = 0;
795     BOOL possessive = FALSE;
796 zherczeg 929 BOOL setsom_found = recursive;
797     BOOL setmark_found = recursive;
798 ph10 664
799     if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
800     {
801 zherczeg 696 length = 3;
802 ph10 664 possessive = TRUE;
803     }
804    
805     cc = next_opcode(common, cc);
806     SLJIT_ASSERT(cc != NULL);
807     while (cc < ccend)
808     switch(*cc)
809     {
810     case OP_SET_SOM:
811 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
812 ph10 664 if (!setsom_found)
813     {
814     length += 2;
815     setsom_found = TRUE;
816     }
817 zherczeg 929 cc += 1;
818 ph10 664 break;
819    
820 zherczeg 929 case OP_MARK:
821     SLJIT_ASSERT(common->mark_ptr != 0);
822     if (!setmark_found)
823     {
824     length += 2;
825     setmark_found = TRUE;
826     }
827     cc += 1 + 2 + cc[1];
828     break;
829    
830     case OP_RECURSE:
831     if (common->has_set_som && !setsom_found)
832     {
833     length += 2;
834     setsom_found = TRUE;
835     }
836     if (common->mark_ptr != 0 && !setmark_found)
837     {
838     length += 2;
839     setmark_found = TRUE;
840     }
841     cc += 1 + LINK_SIZE;
842     break;
843    
844 ph10 664 case OP_CBRA:
845     case OP_CBRAPOS:
846     case OP_SCBRA:
847     case OP_SCBRAPOS:
848     length += 3;
849 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
850 ph10 664 break;
851    
852     default:
853     cc = next_opcode(common, cc);
854     SLJIT_ASSERT(cc != NULL);
855     break;
856     }
857    
858     /* Possessive quantifiers can use a special case. */
859 zherczeg 726 if (SLJIT_UNLIKELY(possessive) && length == 3)
860 ph10 664 return -1;
861    
862     if (length > 0)
863 zherczeg 726 return length + 1;
864     return -1;
865 ph10 664 }
866    
867 ph10 836 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
868 ph10 664 {
869     DEFINE_COMPILER;
870 ph10 836 pcre_uchar *ccend = bracketend(cc);
871 zherczeg 929 BOOL setsom_found = recursive;
872     BOOL setmark_found = recursive;
873 ph10 664 int offset;
874    
875 zherczeg 726 /* >= 1 + shortest item size (2) */
876 zherczeg 906 SLJIT_UNUSED_ARG(stacktop);
877 zherczeg 726 SLJIT_ASSERT(stackpos >= stacktop + 2);
878 ph10 664
879     stackpos = STACK(stackpos);
880     if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
881     cc = next_opcode(common, cc);
882     SLJIT_ASSERT(cc != NULL);
883     while (cc < ccend)
884     switch(*cc)
885     {
886     case OP_SET_SOM:
887 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
888 ph10 664 if (!setsom_found)
889     {
890     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
891     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
892     stackpos += (int)sizeof(sljit_w);
893     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
894     stackpos += (int)sizeof(sljit_w);
895     setsom_found = TRUE;
896     }
897 zherczeg 929 cc += 1;
898 ph10 664 break;
899    
900 zherczeg 929 case OP_MARK:
901     SLJIT_ASSERT(common->mark_ptr != 0);
902     if (!setmark_found)
903     {
904     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
905     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
906     stackpos += (int)sizeof(sljit_w);
907     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
908     stackpos += (int)sizeof(sljit_w);
909     setmark_found = TRUE;
910     }
911     cc += 1 + 2 + cc[1];
912     break;
913    
914     case OP_RECURSE:
915     if (common->has_set_som && !setsom_found)
916     {
917     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
918     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
919     stackpos += (int)sizeof(sljit_w);
920     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
921     stackpos += (int)sizeof(sljit_w);
922     setsom_found = TRUE;
923     }
924     if (common->mark_ptr != 0 && !setmark_found)
925     {
926     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
927     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
928     stackpos += (int)sizeof(sljit_w);
929     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
930     stackpos += (int)sizeof(sljit_w);
931     setmark_found = TRUE;
932     }
933     cc += 1 + LINK_SIZE;
934     break;
935    
936 ph10 664 case OP_CBRA:
937     case OP_CBRAPOS:
938     case OP_SCBRA:
939     case OP_SCBRAPOS:
940     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
941     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
942     stackpos += (int)sizeof(sljit_w);
943     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
944     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
945     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
946     stackpos += (int)sizeof(sljit_w);
947     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
948     stackpos += (int)sizeof(sljit_w);
949    
950 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
951 ph10 664 break;
952    
953     default:
954     cc = next_opcode(common, cc);
955     SLJIT_ASSERT(cc != NULL);
956     break;
957     }
958    
959     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
960 zherczeg 726 SLJIT_ASSERT(stackpos == STACK(stacktop));
961 ph10 664 }
962    
963 ph10 836 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
964 ph10 664 {
965     int localsize = 2;
966 ph10 836 pcre_uchar *alternative;
967 ph10 664 /* Calculate the sum of the local variables. */
968     while (cc < ccend)
969     {
970     switch(*cc)
971     {
972     case OP_ASSERT:
973     case OP_ASSERT_NOT:
974     case OP_ASSERTBACK:
975     case OP_ASSERTBACK_NOT:
976     case OP_ONCE:
977 zherczeg 726 case OP_ONCE_NC:
978 ph10 664 case OP_BRAPOS:
979     case OP_SBRA:
980     case OP_SBRAPOS:
981     case OP_SCOND:
982     localsize++;
983     cc += 1 + LINK_SIZE;
984     break;
985    
986     case OP_CBRA:
987     case OP_SCBRA:
988     localsize++;
989 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
990 ph10 664 break;
991    
992     case OP_CBRAPOS:
993     case OP_SCBRAPOS:
994     localsize += 2;
995 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
996 ph10 664 break;
997    
998     case OP_COND:
999     /* Might be a hidden SCOND. */
1000     alternative = cc + GET(cc, 1);
1001     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1002     localsize++;
1003     cc += 1 + LINK_SIZE;
1004     break;
1005    
1006     default:
1007     cc = next_opcode(common, cc);
1008     SLJIT_ASSERT(cc != NULL);
1009     break;
1010     }
1011     }
1012     SLJIT_ASSERT(cc == ccend);
1013     return localsize;
1014     }
1015    
1016 ph10 836 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1017 ph10 664 BOOL save, int stackptr, int stacktop)
1018     {
1019     DEFINE_COMPILER;
1020     int srcw[2];
1021     int count;
1022     BOOL tmp1next = TRUE;
1023     BOOL tmp1empty = TRUE;
1024     BOOL tmp2empty = TRUE;
1025 ph10 836 pcre_uchar *alternative;
1026 ph10 664 enum {
1027     start,
1028     loop,
1029     end
1030     } status;
1031    
1032     status = save ? start : loop;
1033     stackptr = STACK(stackptr - 2);
1034     stacktop = STACK(stacktop - 1);
1035    
1036     if (!save)
1037     {
1038     stackptr += sizeof(sljit_w);
1039     if (stackptr < stacktop)
1040     {
1041     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1042     stackptr += sizeof(sljit_w);
1043     tmp1empty = FALSE;
1044     }
1045     if (stackptr < stacktop)
1046     {
1047     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1048     stackptr += sizeof(sljit_w);
1049     tmp2empty = FALSE;
1050     }
1051     /* The tmp1next must be TRUE in either way. */
1052     }
1053    
1054     while (status != end)
1055     {
1056     count = 0;
1057     switch(status)
1058     {
1059     case start:
1060 zherczeg 920 SLJIT_ASSERT(save && common->recursive_head != 0);
1061 ph10 664 count = 1;
1062 zherczeg 920 srcw[0] = common->recursive_head;
1063 ph10 664 status = loop;
1064     break;
1065    
1066     case loop:
1067     if (cc >= ccend)
1068     {
1069     status = end;
1070     break;
1071     }
1072    
1073     switch(*cc)
1074     {
1075     case OP_ASSERT:
1076     case OP_ASSERT_NOT:
1077     case OP_ASSERTBACK:
1078     case OP_ASSERTBACK_NOT:
1079     case OP_ONCE:
1080 zherczeg 726 case OP_ONCE_NC:
1081 ph10 664 case OP_BRAPOS:
1082     case OP_SBRA:
1083     case OP_SBRAPOS:
1084     case OP_SCOND:
1085     count = 1;
1086 ph10 836 srcw[0] = PRIV_DATA(cc);
1087 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1088     cc += 1 + LINK_SIZE;
1089     break;
1090    
1091     case OP_CBRA:
1092     case OP_SCBRA:
1093     count = 1;
1094     srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1095 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1096 ph10 664 break;
1097    
1098     case OP_CBRAPOS:
1099     case OP_SCBRAPOS:
1100     count = 2;
1101     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1102 ph10 836 srcw[0] = PRIV_DATA(cc);
1103 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1104 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1105 ph10 664 break;
1106    
1107     case OP_COND:
1108     /* Might be a hidden SCOND. */
1109     alternative = cc + GET(cc, 1);
1110     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1111     {
1112     count = 1;
1113 ph10 836 srcw[0] = PRIV_DATA(cc);
1114 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1115     }
1116     cc += 1 + LINK_SIZE;
1117     break;
1118    
1119     default:
1120     cc = next_opcode(common, cc);
1121     SLJIT_ASSERT(cc != NULL);
1122     break;
1123     }
1124     break;
1125    
1126     case end:
1127     SLJIT_ASSERT_STOP();
1128     break;
1129     }
1130    
1131     while (count > 0)
1132     {
1133     count--;
1134     if (save)
1135     {
1136     if (tmp1next)
1137     {
1138     if (!tmp1empty)
1139     {
1140     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1141     stackptr += sizeof(sljit_w);
1142     }
1143     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1144     tmp1empty = FALSE;
1145     tmp1next = FALSE;
1146     }
1147     else
1148     {
1149     if (!tmp2empty)
1150     {
1151     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1152     stackptr += sizeof(sljit_w);
1153     }
1154     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1155     tmp2empty = FALSE;
1156     tmp1next = TRUE;
1157     }
1158     }
1159     else
1160     {
1161     if (tmp1next)
1162     {
1163     SLJIT_ASSERT(!tmp1empty);
1164     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1165     tmp1empty = stackptr >= stacktop;
1166     if (!tmp1empty)
1167     {
1168     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1169     stackptr += sizeof(sljit_w);
1170     }
1171     tmp1next = FALSE;
1172     }
1173     else
1174     {
1175     SLJIT_ASSERT(!tmp2empty);
1176     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1177     tmp2empty = stackptr >= stacktop;
1178     if (!tmp2empty)
1179     {
1180     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1181     stackptr += sizeof(sljit_w);
1182     }
1183     tmp1next = TRUE;
1184     }
1185     }
1186     }
1187     }
1188    
1189     if (save)
1190     {
1191     if (tmp1next)
1192     {
1193     if (!tmp1empty)
1194     {
1195     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1196     stackptr += sizeof(sljit_w);
1197     }
1198     if (!tmp2empty)
1199     {
1200     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1201     stackptr += sizeof(sljit_w);
1202     }
1203     }
1204     else
1205     {
1206     if (!tmp2empty)
1207     {
1208     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1209     stackptr += sizeof(sljit_w);
1210     }
1211     if (!tmp1empty)
1212     {
1213     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1214     stackptr += sizeof(sljit_w);
1215     }
1216     }
1217     }
1218     SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1219     }
1220    
1221     static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1222     {
1223     return (value & (value - 1)) == 0;
1224     }
1225    
1226     static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1227     {
1228     while (list)
1229     {
1230     /* sljit_set_label is clever enough to do nothing
1231     if either the jump or the label is NULL */
1232     sljit_set_label(list->jump, label);
1233     list = list->next;
1234     }
1235     }
1236    
1237     static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1238     {
1239     jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1240     if (list_item)
1241     {
1242     list_item->next = *list;
1243     list_item->jump = jump;
1244     *list = list_item;
1245     }
1246     }
1247    
1248     static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1249     {
1250     DEFINE_COMPILER;
1251     stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1252    
1253     if (list_item)
1254     {
1255     list_item->type = type;
1256     list_item->data = data;
1257     list_item->start = start;
1258     list_item->leave = LABEL();
1259     list_item->next = common->stubs;
1260     common->stubs = list_item;
1261     }
1262     }
1263    
1264     static void flush_stubs(compiler_common *common)
1265     {
1266     DEFINE_COMPILER;
1267     stub_list* list_item = common->stubs;
1268    
1269     while (list_item)
1270     {
1271     JUMPHERE(list_item->start);
1272     switch(list_item->type)
1273     {
1274     case stack_alloc:
1275     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1276     break;
1277     }
1278     JUMPTO(SLJIT_JUMP, list_item->leave);
1279     list_item = list_item->next;
1280     }
1281     common->stubs = NULL;
1282     }
1283    
1284 ph10 677 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1285     {
1286     DEFINE_COMPILER;
1287    
1288 zherczeg 695 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1289 ph10 677 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1290     }
1291    
1292 ph10 664 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1293     {
1294     /* May destroy all locals and registers except TMP2. */
1295     DEFINE_COMPILER;
1296    
1297     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1298     #ifdef DESTROY_REGISTERS
1299     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1300     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1301     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1302     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1303     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1304     #endif
1305     add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1306     }
1307    
1308     static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1309     {
1310     DEFINE_COMPILER;
1311     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1312     }
1313    
1314     static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1315     {
1316     DEFINE_COMPILER;
1317     struct sljit_label *loop;
1318     int i;
1319     /* At this point we can freely use all temporary registers. */
1320     /* TMP1 returns with begin - 1. */
1321 zherczeg 880 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1322 ph10 664 if (length < 8)
1323     {
1324     for (i = 0; i < length; i++)
1325     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1326     }
1327     else
1328     {
1329 zherczeg 955 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1330 ph10 664 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1331     loop = LABEL();
1332     OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1333     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1334     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1335     }
1336     }
1337    
1338 zherczeg 696 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1339 ph10 664 {
1340     DEFINE_COMPILER;
1341     struct sljit_label *loop;
1342     struct sljit_jump *earlyexit;
1343    
1344     /* At this point we can freely use all registers. */
1345 zherczeg 880 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1346 zherczeg 696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1347    
1348 ph10 664 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1349 zherczeg 929 if (common->mark_ptr != 0)
1350     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1351 ph10 664 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1352 zherczeg 929 if (common->mark_ptr != 0)
1353     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1354 ph10 664 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1355     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1356 zherczeg 955 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1357 ph10 664 /* Unlikely, but possible */
1358     earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1359     loop = LABEL();
1360 zherczeg 880 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1361     OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1362 ph10 664 /* Copy the integer value to the output buffer */
1363 ph10 836 #ifdef COMPILE_PCRE16
1364 zherczeg 880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1365 ph10 836 #endif
1366 zherczeg 880 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1367 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1368     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1369     JUMPHERE(earlyexit);
1370 zherczeg 696
1371     /* Calculate the return value, which is the maximum ovector value. */
1372     if (topbracket > 1)
1373     {
1374 zherczeg 955 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1375 zherczeg 696 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1376    
1377 zherczeg 880 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1378 zherczeg 696 loop = LABEL();
1379 zherczeg 715 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1380 zherczeg 696 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1381 zherczeg 880 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1382 zherczeg 696 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1383     }
1384     else
1385     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1386 ph10 664 }
1387    
1388 zherczeg 914 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1389     {
1390     DEFINE_COMPILER;
1391    
1392     SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1393 zherczeg 920 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1394 zherczeg 914
1395     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1396     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1397     OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1398     CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1399    
1400     /* Store match begin and end. */
1401     OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1402     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1403 zherczeg 920 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1404 zherczeg 914 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1405     #ifdef COMPILE_PCRE16
1406     OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1407     #endif
1408     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1409    
1410     OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1411     #ifdef COMPILE_PCRE16
1412     OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1413     #endif
1414     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1415    
1416     JUMPTO(SLJIT_JUMP, leave);
1417     }
1418    
1419     static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1420     {
1421     /* May destroy TMP1. */
1422     DEFINE_COMPILER;
1423     struct sljit_jump *jump;
1424    
1425     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1426     {
1427 zherczeg 920 /* The value of -1 must be kept for start_used_ptr! */
1428     OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1429     /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1430     is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1431 zherczeg 914 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1432 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1433 zherczeg 914 JUMPHERE(jump);
1434     }
1435     else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1436     {
1437 zherczeg 920 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1438     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1439 zherczeg 914 JUMPHERE(jump);
1440     }
1441     }
1442    
1443 ph10 836 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1444 ph10 664 {
1445     /* Detects if the character has an othercase. */
1446     unsigned int c;
1447    
1448 ph10 836 #ifdef SUPPORT_UTF
1449     if (common->utf)
1450 ph10 664 {
1451     GETCHAR(c, cc);
1452     if (c > 127)
1453     {
1454     #ifdef SUPPORT_UCP
1455     return c != UCD_OTHERCASE(c);
1456     #else
1457     return FALSE;
1458     #endif
1459     }
1460 ph10 836 #ifndef COMPILE_PCRE8
1461     return common->fcc[c] != c;
1462     #endif
1463 ph10 664 }
1464     else
1465     #endif
1466     c = *cc;
1467 ph10 836 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1468 ph10 664 }
1469    
1470     static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1471     {
1472     /* Returns with the othercase. */
1473 ph10 836 #ifdef SUPPORT_UTF
1474     if (common->utf && c > 127)
1475 ph10 664 {
1476     #ifdef SUPPORT_UCP
1477     return UCD_OTHERCASE(c);
1478     #else
1479     return c;
1480     #endif
1481     }
1482     #endif
1483 ph10 836 return TABLE_GET(c, common->fcc, c);
1484 ph10 664 }
1485    
1486 ph10 836 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1487 ph10 664 {
1488     /* Detects if the character and its othercase has only 1 bit difference. */
1489     unsigned int c, oc, bit;
1490 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1491 ph10 664 int n;
1492     #endif
1493    
1494 ph10 836 #ifdef SUPPORT_UTF
1495     if (common->utf)
1496 ph10 664 {
1497     GETCHAR(c, cc);
1498     if (c <= 127)
1499     oc = common->fcc[c];
1500     else
1501     {
1502     #ifdef SUPPORT_UCP
1503     oc = UCD_OTHERCASE(c);
1504     #else
1505     oc = c;
1506     #endif
1507     }
1508     }
1509     else
1510     {
1511     c = *cc;
1512 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1513 ph10 664 }
1514     #else
1515     c = *cc;
1516 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1517 ph10 664 #endif
1518    
1519     SLJIT_ASSERT(c != oc);
1520    
1521     bit = c ^ oc;
1522     /* Optimized for English alphabet. */
1523     if (c <= 127 && bit == 0x20)
1524     return (0 << 8) | 0x20;
1525    
1526     /* Since c != oc, they must have at least 1 bit difference. */
1527     if (!ispowerof2(bit))
1528     return 0;
1529    
1530 ph10 836 #ifdef COMPILE_PCRE8
1531    
1532     #ifdef SUPPORT_UTF
1533     if (common->utf && c > 127)
1534 ph10 664 {
1535 ph10 836 n = GET_EXTRALEN(*cc);
1536 ph10 664 while ((bit & 0x3f) == 0)
1537     {
1538     n--;
1539     bit >>= 6;
1540     }
1541     return (n << 8) | bit;
1542     }
1543 ph10 836 #endif /* SUPPORT_UTF */
1544 ph10 664 return (0 << 8) | bit;
1545 ph10 836
1546     #else /* COMPILE_PCRE8 */
1547    
1548     #ifdef COMPILE_PCRE16
1549     #ifdef SUPPORT_UTF
1550     if (common->utf && c > 65535)
1551     {
1552     if (bit >= (1 << 10))
1553     bit >>= 10;
1554     else
1555     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1556     }
1557     #endif /* SUPPORT_UTF */
1558     return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1559     #endif /* COMPILE_PCRE16 */
1560    
1561     #endif /* COMPILE_PCRE8 */
1562 ph10 664 }
1563    
1564 zherczeg 918 static void check_partial(compiler_common *common, BOOL force)
1565 ph10 664 {
1566 zherczeg 918 /* Checks whether a partial matching is occured. Does not modify registers. */
1567 ph10 664 DEFINE_COMPILER;
1568 zherczeg 918 struct sljit_jump *jump = NULL;
1569 zherczeg 914
1570 zherczeg 918 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1571    
1572 zherczeg 914 if (common->mode == JIT_COMPILE)
1573     return;
1574    
1575 zherczeg 920 if (!force)
1576     jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1577     else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1578     jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1579 zherczeg 918
1580 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1581 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1582 zherczeg 914 else
1583     {
1584     if (common->partialmatchlabel != NULL)
1585     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1586     else
1587     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1588     }
1589 zherczeg 918
1590     if (jump != NULL)
1591     JUMPHERE(jump);
1592 ph10 664 }
1593    
1594 zherczeg 914 static struct sljit_jump *check_str_end(compiler_common *common)
1595     {
1596     /* Does not affect registers. Usually used in a tight spot. */
1597     DEFINE_COMPILER;
1598     struct sljit_jump *jump;
1599     struct sljit_jump *nohit;
1600     struct sljit_jump *return_value;
1601    
1602     if (common->mode == JIT_COMPILE)
1603     return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1604    
1605     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1606     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1607     {
1608 zherczeg 920 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1609     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1610 zherczeg 914 JUMPHERE(nohit);
1611     return_value = JUMP(SLJIT_JUMP);
1612     }
1613     else
1614     {
1615 zherczeg 920 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1616 zherczeg 914 if (common->partialmatchlabel != NULL)
1617     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1618     else
1619     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1620     }
1621     JUMPHERE(jump);
1622     return return_value;
1623     }
1624    
1625     static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1626     {
1627     DEFINE_COMPILER;
1628     struct sljit_jump *jump;
1629    
1630     if (common->mode == JIT_COMPILE)
1631     {
1632     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1633     return;
1634     }
1635    
1636     /* Partial matching mode. */
1637     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1638 zherczeg 920 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1639 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1640     {
1641 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1642 zherczeg 914 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1643     }
1644     else
1645     {
1646     if (common->partialmatchlabel != NULL)
1647     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1648     else
1649     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1650     }
1651     JUMPHERE(jump);
1652     }
1653    
1654 ph10 664 static void read_char(compiler_common *common)
1655     {
1656     /* Reads the character into TMP1, updates STR_PTR.
1657     Does not check STR_END. TMP2 Destroyed. */
1658     DEFINE_COMPILER;
1659 ph10 836 #ifdef SUPPORT_UTF
1660 ph10 664 struct sljit_jump *jump;
1661     #endif
1662    
1663 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1664     #ifdef SUPPORT_UTF
1665     if (common->utf)
1666 ph10 664 {
1667 ph10 836 #ifdef COMPILE_PCRE8
1668 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1669 ph10 836 #else
1670     #ifdef COMPILE_PCRE16
1671     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1672     #endif
1673     #endif /* COMPILE_PCRE8 */
1674     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1675 ph10 664 JUMPHERE(jump);
1676     }
1677     #endif
1678 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1679 ph10 664 }
1680    
1681     static void peek_char(compiler_common *common)
1682     {
1683     /* Reads the character into TMP1, keeps STR_PTR.
1684     Does not check STR_END. TMP2 Destroyed. */
1685     DEFINE_COMPILER;
1686 ph10 836 #ifdef SUPPORT_UTF
1687 ph10 664 struct sljit_jump *jump;
1688     #endif
1689    
1690 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1691     #ifdef SUPPORT_UTF
1692     if (common->utf)
1693 ph10 664 {
1694 ph10 836 #ifdef COMPILE_PCRE8
1695 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1696 ph10 836 #else
1697     #ifdef COMPILE_PCRE16
1698     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1699     #endif
1700     #endif /* COMPILE_PCRE8 */
1701     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1702 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1703     JUMPHERE(jump);
1704     }
1705     #endif
1706     }
1707    
1708     static void read_char8_type(compiler_common *common)
1709     {
1710     /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1711     DEFINE_COMPILER;
1712 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1713 ph10 664 struct sljit_jump *jump;
1714     #endif
1715    
1716 ph10 836 #ifdef SUPPORT_UTF
1717     if (common->utf)
1718 ph10 664 {
1719 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1720     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1721     #ifdef COMPILE_PCRE8
1722 ph10 664 /* This can be an extra read in some situations, but hopefully
1723 ph10 836 it is needed in most cases. */
1724 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1725 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1726 ph10 836 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1727 ph10 664 JUMPHERE(jump);
1728 ph10 836 #else
1729     #ifdef COMPILE_PCRE16
1730     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1731     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1732     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1733     JUMPHERE(jump);
1734     /* Skip low surrogate if necessary. */
1735     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1736     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1737     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1738     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1739     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1740     #endif
1741     #endif /* COMPILE_PCRE8 */
1742 ph10 664 return;
1743     }
1744     #endif
1745 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1746     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1747     #ifdef COMPILE_PCRE16
1748     /* The ctypes array contains only 256 values. */
1749     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1750     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1751     #endif
1752     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1753     #ifdef COMPILE_PCRE16
1754     JUMPHERE(jump);
1755     #endif
1756 ph10 664 }
1757    
1758     static void skip_char_back(compiler_common *common)
1759     {
1760 ph10 836 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1761 ph10 664 DEFINE_COMPILER;
1762 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1763 ph10 664 struct sljit_label *label;
1764    
1765 ph10 836 if (common->utf)
1766 ph10 664 {
1767     label = LABEL();
1768 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1769     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1770 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1771     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1772     return;
1773     }
1774     #endif
1775 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1776     if (common->utf)
1777     {
1778     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1779     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1780     /* Skip low surrogate if necessary. */
1781     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1782     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1783     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1784     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1785     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1786     return;
1787     }
1788     #endif
1789     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1790 ph10 664 }
1791    
1792     static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1793     {
1794     /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1795     DEFINE_COMPILER;
1796    
1797     if (nltype == NLTYPE_ANY)
1798     {
1799     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1800     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1801     }
1802     else if (nltype == NLTYPE_ANYCRLF)
1803     {
1804     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1805     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1806     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1807     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1808     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1809     }
1810     else
1811     {
1812 ph10 836 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1813 ph10 664 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1814     }
1815     }
1816    
1817 ph10 836 #ifdef SUPPORT_UTF
1818    
1819     #ifdef COMPILE_PCRE8
1820     static void do_utfreadchar(compiler_common *common)
1821 ph10 664 {
1822 ph10 836 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1823 zherczeg 736 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1824 ph10 664 DEFINE_COMPILER;
1825     struct sljit_jump *jump;
1826    
1827 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1828 ph10 664 /* Searching for the first zero. */
1829     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1830     jump = JUMP(SLJIT_C_NOT_ZERO);
1831 ph10 836 /* Two byte sequence. */
1832     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1833     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1834 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1835     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1836     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1837     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1838 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1839 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1840     JUMPHERE(jump);
1841    
1842     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1843     jump = JUMP(SLJIT_C_NOT_ZERO);
1844 ph10 836 /* Three byte sequence. */
1845     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1846 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1847     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1848     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1849     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1850     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1851 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1852     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1853 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1854     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1855 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1856 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1857     JUMPHERE(jump);
1858    
1859 ph10 836 /* Four byte sequence. */
1860     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1861 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1862     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1863     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1864     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1865     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1866 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1867 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1868     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1869     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1870 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1871     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1872 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1873     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1874 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1875 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1876     }
1877    
1878 ph10 836 static void do_utfreadtype8(compiler_common *common)
1879 ph10 664 {
1880 ph10 836 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1881     of the character (>= 0xc0). Return value in TMP1. */
1882 ph10 664 DEFINE_COMPILER;
1883     struct sljit_jump *jump;
1884     struct sljit_jump *compare;
1885    
1886 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1887 ph10 664
1888     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1889     jump = JUMP(SLJIT_C_NOT_ZERO);
1890 ph10 836 /* Two byte sequence. */
1891     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1892     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1893 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1894     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1895     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1896     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1897     compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1898     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1899     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1900    
1901     JUMPHERE(compare);
1902     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1903     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1904     JUMPHERE(jump);
1905    
1906     /* We only have types for characters less than 256. */
1907 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1908 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1909     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1910     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1911     }
1912    
1913 ph10 836 #else /* COMPILE_PCRE8 */
1914 ph10 664
1915 ph10 836 #ifdef COMPILE_PCRE16
1916     static void do_utfreadchar(compiler_common *common)
1917     {
1918     /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1919     of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1920     DEFINE_COMPILER;
1921     struct sljit_jump *jump;
1922    
1923 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1924 ph10 836 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1925     /* Do nothing, only return. */
1926     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1927    
1928     JUMPHERE(jump);
1929     /* Combine two 16 bit characters. */
1930     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1931     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1932     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1933     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1934     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1935     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1936     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1937     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1938     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1939     }
1940     #endif /* COMPILE_PCRE16 */
1941    
1942     #endif /* COMPILE_PCRE8 */
1943    
1944     #endif /* SUPPORT_UTF */
1945    
1946 ph10 664 #ifdef SUPPORT_UCP
1947    
1948     /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1949     #define UCD_BLOCK_MASK 127
1950     #define UCD_BLOCK_SHIFT 7
1951    
1952     static void do_getucd(compiler_common *common)
1953     {
1954     /* Search the UCD record for the character comes in TMP1.
1955     Returns chartype in TMP1 and UCD offset in TMP2. */
1956     DEFINE_COMPILER;
1957    
1958     SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1959    
1960 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
1961 ph10 664 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1962 ph10 836 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1963 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1964     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1965     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1966 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1967 ph10 664 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1968 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1969 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1970     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1971     }
1972     #endif
1973    
1974     static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1975     {
1976     DEFINE_COMPILER;
1977     struct sljit_label *mainloop;
1978     struct sljit_label *newlinelabel = NULL;
1979     struct sljit_jump *start;
1980     struct sljit_jump *end = NULL;
1981     struct sljit_jump *nl = NULL;
1982 ph10 836 #ifdef SUPPORT_UTF
1983     struct sljit_jump *singlechar;
1984 zherczeg 736 #endif
1985 ph10 664 jump_list *newline = NULL;
1986     BOOL newlinecheck = FALSE;
1987 ph10 836 BOOL readuchar = FALSE;
1988 ph10 664
1989     if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1990     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1991     newlinecheck = TRUE;
1992    
1993     if (firstline)
1994     {
1995     /* Search for the end of the first line. */
1996 zherczeg 920 SLJIT_ASSERT(common->first_line_end != 0);
1997 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1998 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1999 ph10 664
2000     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2001     {
2002     mainloop = LABEL();
2003 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2004 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2005 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2006     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2007 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2008     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2009 zherczeg 920 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2010 ph10 664 }
2011     else
2012     {
2013     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2014     mainloop = LABEL();
2015     /* Continual stores does not cause data dependency. */
2016 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2017 ph10 664 read_char(common);
2018     check_newlinechar(common, common->nltype, &newline, TRUE);
2019     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2020 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2021 ph10 664 set_jumps(newline, LABEL());
2022     }
2023    
2024     JUMPHERE(end);
2025     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2026     }
2027    
2028     start = JUMP(SLJIT_JUMP);
2029    
2030     if (newlinecheck)
2031     {
2032     newlinelabel = LABEL();
2033 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2034 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2035 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2036 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2037     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2038 ph10 836 #ifdef COMPILE_PCRE16
2039     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2040     #endif
2041 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2042     nl = JUMP(SLJIT_JUMP);
2043     }
2044    
2045     mainloop = LABEL();
2046    
2047     /* Increasing the STR_PTR here requires one less jump in the most common case. */
2048 ph10 836 #ifdef SUPPORT_UTF
2049     if (common->utf) readuchar = TRUE;
2050 ph10 664 #endif
2051 ph10 836 if (newlinecheck) readuchar = TRUE;
2052 ph10 664
2053 ph10 836 if (readuchar)
2054     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2055 ph10 664
2056     if (newlinecheck)
2057     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2058    
2059 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2060     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2061     if (common->utf)
2062 ph10 664 {
2063 ph10 836 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2064     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2065 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2066 ph10 836 JUMPHERE(singlechar);
2067 ph10 664 }
2068     #endif
2069 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2070     if (common->utf)
2071     {
2072     singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2073     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2074     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2075     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2076     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2077     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2078     JUMPHERE(singlechar);
2079     }
2080     #endif
2081 ph10 664 JUMPHERE(start);
2082    
2083     if (newlinecheck)
2084     {
2085     JUMPHERE(end);
2086     JUMPHERE(nl);
2087     }
2088    
2089     return mainloop;
2090     }
2091    
2092 ph10 836 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2093 ph10 664 {
2094     DEFINE_COMPILER;
2095     struct sljit_label *start;
2096     struct sljit_jump *leave;
2097     struct sljit_jump *found;
2098 ph10 836 pcre_uchar oc, bit;
2099 ph10 664
2100     if (firstline)
2101     {
2102     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2103 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2104 ph10 664 }
2105    
2106     start = LABEL();
2107     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2108 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2109 ph10 664
2110 ph10 836 oc = first_char;
2111     if (caseless)
2112     {
2113     oc = TABLE_GET(first_char, common->fcc, first_char);
2114     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2115     if (first_char > 127 && common->utf)
2116     oc = UCD_OTHERCASE(first_char);
2117     #endif
2118     }
2119     if (first_char == oc)
2120     found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2121 ph10 664 else
2122     {
2123 ph10 836 bit = first_char ^ oc;
2124 ph10 664 if (ispowerof2(bit))
2125     {
2126     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2127 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2128 ph10 664 }
2129     else
2130     {
2131 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2132 ph10 664 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2133     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2134     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2135     found = JUMP(SLJIT_C_NOT_ZERO);
2136     }
2137     }
2138    
2139 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2140     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2141     if (common->utf)
2142 ph10 664 {
2143 zherczeg 736 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2144 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2145 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2146     }
2147     #endif
2148 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2149     if (common->utf)
2150     {
2151     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2152     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2153     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2154     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2155     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2156     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2157     }
2158     #endif
2159 ph10 664 JUMPTO(SLJIT_JUMP, start);
2160     JUMPHERE(found);
2161     JUMPHERE(leave);
2162    
2163     if (firstline)
2164     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2165     }
2166    
2167     static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2168     {
2169     DEFINE_COMPILER;
2170     struct sljit_label *loop;
2171     struct sljit_jump *lastchar;
2172     struct sljit_jump *firstchar;
2173     struct sljit_jump *leave;
2174     struct sljit_jump *foundcr = NULL;
2175     struct sljit_jump *notfoundnl;
2176     jump_list *newline = NULL;
2177    
2178     if (firstline)
2179     {
2180     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2181 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2182 ph10 664 }
2183    
2184     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2185     {
2186     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2187     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2188     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2189     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2190     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2191    
2192 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2193 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2194     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2195 ph10 836 #ifdef COMPILE_PCRE16
2196     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2197     #endif
2198 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2199    
2200     loop = LABEL();
2201 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2202 ph10 664 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2203 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2204     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2205 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2206     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2207    
2208     JUMPHERE(leave);
2209     JUMPHERE(firstchar);
2210     JUMPHERE(lastchar);
2211    
2212     if (firstline)
2213     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2214     return;
2215     }
2216    
2217     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2218     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2219     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2220     skip_char_back(common);
2221    
2222     loop = LABEL();
2223     read_char(common);
2224     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2225     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2226     foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2227     check_newlinechar(common, common->nltype, &newline, FALSE);
2228     set_jumps(newline, loop);
2229    
2230     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2231     {
2232     leave = JUMP(SLJIT_JUMP);
2233     JUMPHERE(foundcr);
2234     notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2235 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2236 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2237     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2238 ph10 836 #ifdef COMPILE_PCRE16
2239     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2240     #endif
2241 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2242     JUMPHERE(notfoundnl);
2243     JUMPHERE(leave);
2244     }
2245     JUMPHERE(lastchar);
2246     JUMPHERE(firstchar);
2247    
2248     if (firstline)
2249     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2250     }
2251    
2252     static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2253     {
2254     DEFINE_COMPILER;
2255     struct sljit_label *start;
2256     struct sljit_jump *leave;
2257     struct sljit_jump *found;
2258 ph10 836 #ifndef COMPILE_PCRE8
2259     struct sljit_jump *jump;
2260     #endif
2261 ph10 664
2262     if (firstline)
2263     {
2264     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2265 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2266 ph10 664 }
2267    
2268     start = LABEL();
2269     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2270 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2271     #ifdef SUPPORT_UTF
2272     if (common->utf)
2273 zherczeg 736 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2274 ph10 664 #endif
2275 ph10 836 #ifndef COMPILE_PCRE8
2276     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2277     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2278     JUMPHERE(jump);
2279     #endif
2280 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2281     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2282     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2283     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2284     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2285     found = JUMP(SLJIT_C_NOT_ZERO);
2286    
2287 ph10 836 #ifdef SUPPORT_UTF
2288     if (common->utf)
2289 zherczeg 736 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2290     #endif
2291 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2292     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2293     if (common->utf)
2294 zherczeg 736 {
2295     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2296 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2297 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2298     }
2299 ph10 664 #endif
2300 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2301     if (common->utf)
2302     {
2303     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2304     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2305     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2306     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2307     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2308     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2309     }
2310     #endif
2311 ph10 664 JUMPTO(SLJIT_JUMP, start);
2312     JUMPHERE(found);
2313     JUMPHERE(leave);
2314    
2315     if (firstline)
2316     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2317     }
2318    
2319 ph10 836 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2320 ph10 664 {
2321     DEFINE_COMPILER;
2322     struct sljit_label *loop;
2323     struct sljit_jump *toolong;
2324     struct sljit_jump *alreadyfound;
2325     struct sljit_jump *found;
2326     struct sljit_jump *foundoc = NULL;
2327     struct sljit_jump *notfound;
2328 ph10 836 pcre_uchar oc, bit;
2329 ph10 664
2330 zherczeg 920 SLJIT_ASSERT(common->req_char_ptr != 0);
2331     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2332 ph10 664 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2333     toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2334     alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2335    
2336 ph10 836 if (has_firstchar)
2337     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2338 ph10 664 else
2339     OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2340    
2341     loop = LABEL();
2342     notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2343    
2344 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2345     oc = req_char;
2346     if (caseless)
2347     {
2348     oc = TABLE_GET(req_char, common->fcc, req_char);
2349     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2350     if (req_char > 127 && common->utf)
2351     oc = UCD_OTHERCASE(req_char);
2352     #endif
2353     }
2354     if (req_char == oc)
2355     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2356 ph10 664 else
2357     {
2358 ph10 836 bit = req_char ^ oc;
2359 ph10 664 if (ispowerof2(bit))
2360     {
2361     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2362 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2363 ph10 664 }
2364     else
2365     {
2366 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2367 ph10 664 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2368     }
2369     }
2370 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2371 ph10 664 JUMPTO(SLJIT_JUMP, loop);
2372    
2373     JUMPHERE(found);
2374     if (foundoc)
2375     JUMPHERE(foundoc);
2376 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2377 ph10 664 JUMPHERE(alreadyfound);
2378     JUMPHERE(toolong);
2379     return notfound;
2380     }
2381    
2382     static void do_revertframes(compiler_common *common)
2383     {
2384     DEFINE_COMPILER;
2385     struct sljit_jump *jump;
2386     struct sljit_label *mainloop;
2387    
2388 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2389 zherczeg 726 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2390 zherczeg 955 GET_LOCAL_BASE(TMP3, 0, 0);
2391 ph10 664
2392     /* Drop frames until we reach STACK_TOP. */
2393     mainloop = LABEL();
2394     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2395     jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2396 zherczeg 955 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2397 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2398     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2399     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2400     JUMPTO(SLJIT_JUMP, mainloop);
2401    
2402     JUMPHERE(jump);
2403     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2404     /* End of dropping frames. */
2405     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2406    
2407     JUMPHERE(jump);
2408     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2409 zherczeg 696 /* Set string begin. */
2410 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2411     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2412     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2413     JUMPTO(SLJIT_JUMP, mainloop);
2414    
2415     JUMPHERE(jump);
2416 zherczeg 929 if (common->mark_ptr != 0)
2417     {
2418     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2419     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2420     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2421     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2422     JUMPTO(SLJIT_JUMP, mainloop);
2423    
2424     JUMPHERE(jump);
2425     }
2426    
2427 ph10 664 /* Unknown command. */
2428     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2429     JUMPTO(SLJIT_JUMP, mainloop);
2430     }
2431    
2432     static void check_wordboundary(compiler_common *common)
2433     {
2434     DEFINE_COMPILER;
2435 zherczeg 914 struct sljit_jump *skipread;
2436 ph10 836 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2437 ph10 664 struct sljit_jump *jump;
2438 ph10 670 #endif
2439 ph10 664
2440 zherczeg 741 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2441 ph10 664
2442 zherczeg 955 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2443 ph10 664 /* Get type of the previous char, and put it to LOCALS1. */
2444     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2445     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2446     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2447 zherczeg 914 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2448 ph10 664 skip_char_back(common);
2449 zherczeg 914 check_start_used_ptr(common);
2450 ph10 664 read_char(common);
2451    
2452     /* Testing char type. */
2453     #ifdef SUPPORT_UCP
2454 ph10 836 if (common->use_ucp)
2455 ph10 664 {
2456     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2457     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2458     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2459     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2460     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2461     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2462     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2463     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2464     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2465     JUMPHERE(jump);
2466     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2467     }
2468     else
2469     #endif
2470     {
2471 ph10 836 #ifndef COMPILE_PCRE8
2472     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2473     #elif defined SUPPORT_UTF
2474 ph10 664 /* Here LOCALS1 has already been zeroed. */
2475     jump = NULL;
2476 ph10 836 if (common->utf)
2477 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2478 ph10 836 #endif /* COMPILE_PCRE8 */
2479 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2480     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2481     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2482     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2483 ph10 836 #ifndef COMPILE_PCRE8
2484     JUMPHERE(jump);
2485     #elif defined SUPPORT_UTF
2486 ph10 664 if (jump != NULL)
2487     JUMPHERE(jump);
2488 ph10 836 #endif /* COMPILE_PCRE8 */
2489 ph10 664 }
2490 zherczeg 914 JUMPHERE(skipread);
2491 ph10 664
2492     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2493 zherczeg 914 skipread = check_str_end(common);
2494 ph10 664 peek_char(common);
2495    
2496     /* Testing char type. This is a code duplication. */
2497     #ifdef SUPPORT_UCP
2498 ph10 836 if (common->use_ucp)
2499 ph10 664 {
2500     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2501     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2502     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2503     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2504     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2505     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2506     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2507     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2508     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2509     JUMPHERE(jump);
2510     }
2511     else
2512     #endif
2513     {
2514 ph10 836 #ifndef COMPILE_PCRE8
2515     /* TMP2 may be destroyed by peek_char. */
2516 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2517 ph10 836 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2518     #elif defined SUPPORT_UTF
2519     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2520 ph10 664 jump = NULL;
2521 ph10 836 if (common->utf)
2522 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2523     #endif
2524     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2525     OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2526     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2527 ph10 836 #ifndef COMPILE_PCRE8
2528     JUMPHERE(jump);
2529     #elif defined SUPPORT_UTF
2530 ph10 664 if (jump != NULL)
2531     JUMPHERE(jump);
2532 ph10 836 #endif /* COMPILE_PCRE8 */
2533 ph10 664 }
2534 zherczeg 914 JUMPHERE(skipread);
2535 ph10 664
2536     OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2537     sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2538     }
2539    
2540     static void check_anynewline(compiler_common *common)
2541     {
2542     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2543     DEFINE_COMPILER;
2544    
2545 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2546 ph10 664
2547     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2548     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2549     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2550     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2551 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2552     #ifdef COMPILE_PCRE8
2553     if (common->utf)
2554 ph10 664 {
2555 ph10 836 #endif
2556 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2557     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2558     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2559 ph10 836 #ifdef COMPILE_PCRE8
2560 ph10 664 }
2561     #endif
2562 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2563 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2564     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2565     }
2566    
2567     static void check_hspace(compiler_common *common)
2568     {
2569     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2570     DEFINE_COMPILER;
2571    
2572 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2573 ph10 664
2574     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2575     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2576     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2577     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2578     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2579 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2580     #ifdef COMPILE_PCRE8
2581     if (common->utf)
2582 ph10 664 {
2583 ph10 836 #endif
2584 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2585     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2586     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2587     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2588     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2589     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2590     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2591     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2592     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2593     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2594     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2595     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2596     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2597 ph10 836 #ifdef COMPILE_PCRE8
2598 ph10 664 }
2599     #endif
2600 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2601 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2602    
2603     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2604     }
2605    
2606     static void check_vspace(compiler_common *common)
2607     {
2608     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2609     DEFINE_COMPILER;
2610    
2611 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2612 ph10 664
2613     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2614     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2615     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2616     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2617 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2618     #ifdef COMPILE_PCRE8
2619     if (common->utf)
2620 ph10 664 {
2621 ph10 836 #endif
2622 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2623     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2624     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2625 ph10 836 #ifdef COMPILE_PCRE8
2626 ph10 664 }
2627     #endif
2628 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2629 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2630    
2631     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2632     }
2633    
2634     #define CHAR1 STR_END
2635     #define CHAR2 STACK_TOP
2636    
2637     static void do_casefulcmp(compiler_common *common)
2638     {
2639     DEFINE_COMPILER;
2640     struct sljit_jump *jump;
2641     struct sljit_label *label;
2642    
2643 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2644 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2645     OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2646     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2647 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2648     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649 ph10 664
2650     label = LABEL();
2651 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2652     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2653 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2654 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2655 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2656    
2657     JUMPHERE(jump);
2658 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2659 ph10 664 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2660     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2661     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2662     }
2663    
2664     #define LCC_TABLE STACK_LIMIT
2665    
2666     static void do_caselesscmp(compiler_common *common)
2667     {
2668     DEFINE_COMPILER;
2669     struct sljit_jump *jump;
2670     struct sljit_label *label;
2671    
2672 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2673 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2674    
2675     OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2676     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2677     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2678     OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2679 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2680     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2681 ph10 664
2682     label = LABEL();
2683 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2684     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2685     #ifndef COMPILE_PCRE8
2686     jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2687     #endif
2688 ph10 664 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2689 ph10 836 #ifndef COMPILE_PCRE8
2690     JUMPHERE(jump);
2691     jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2692     #endif
2693 ph10 664 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2694 ph10 836 #ifndef COMPILE_PCRE8
2695     JUMPHERE(jump);
2696     #endif
2697 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2698 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2699 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2700    
2701     JUMPHERE(jump);
2702 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703 ph10 664 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2704     OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2705     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2706     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2707     }
2708    
2709     #undef LCC_TABLE
2710     #undef CHAR1
2711     #undef CHAR2
2712    
2713 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2714 ph10 664
2715 ph10 836 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2716 ph10 664 {
2717     /* This function would be ineffective to do in JIT level. */
2718     int c1, c2;
2719 zherczeg 929 const pcre_uchar *src2 = args->uchar_ptr;
2720 ph10 836 const pcre_uchar *end2 = args->end;
2721 ph10 664
2722     while (src1 < end1)
2723     {
2724     if (src2 >= end2)
2725 zherczeg 915 return (pcre_uchar*)1;
2726 ph10 664 GETCHARINC(c1, src1);
2727     GETCHARINC(c2, src2);
2728 zherczeg 915 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2729 ph10 664 }
2730     return src2;
2731     }
2732    
2733 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2734 ph10 664
2735 ph10 836 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2736 ph10 664 compare_context* context, jump_list **fallbacks)
2737     {
2738     DEFINE_COMPILER;
2739     unsigned int othercasebit = 0;
2740 ph10 836 pcre_uchar *othercasechar = NULL;
2741     #ifdef SUPPORT_UTF
2742     int utflength;
2743 ph10 664 #endif
2744    
2745     if (caseless && char_has_othercase(common, cc))
2746     {
2747     othercasebit = char_get_othercase_bit(common, cc);
2748     SLJIT_ASSERT(othercasebit);
2749     /* Extracting bit difference info. */
2750 ph10 836 #ifdef COMPILE_PCRE8
2751     othercasechar = cc + (othercasebit >> 8);
2752 ph10 664 othercasebit &= 0xff;
2753 ph10 836 #else
2754     #ifdef COMPILE_PCRE16
2755     othercasechar = cc + (othercasebit >> 9);
2756     if ((othercasebit & 0x100) != 0)
2757     othercasebit = (othercasebit & 0xff) << 8;
2758     else
2759     othercasebit &= 0xff;
2760     #endif
2761     #endif
2762 ph10 664 }
2763    
2764     if (context->sourcereg == -1)
2765     {
2766 ph10 836 #ifdef COMPILE_PCRE8
2767 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2768     if (context->length >= 4)
2769     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2770     else if (context->length >= 2)
2771 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2772 ph10 664 else
2773     #endif
2774     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2775 ph10 836 #else
2776     #ifdef COMPILE_PCRE16
2777     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2778     if (context->length >= 4)
2779     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2780     else
2781     #endif
2782 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2783 ph10 836 #endif
2784     #endif /* COMPILE_PCRE8 */
2785 ph10 664 context->sourcereg = TMP2;
2786     }
2787    
2788 ph10 836 #ifdef SUPPORT_UTF
2789     utflength = 1;
2790     if (common->utf && HAS_EXTRALEN(*cc))
2791     utflength += GET_EXTRALEN(*cc);
2792 ph10 664
2793     do
2794     {
2795     #endif
2796    
2797 ph10 836 context->length -= IN_UCHARS(1);
2798 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2799    
2800     /* Unaligned read is supported. */
2801 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2802 ph10 664 {
2803 ph10 836 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2804     context->oc.asuchars[context->ucharptr] = othercasebit;
2805 ph10 664 }
2806     else
2807     {
2808 ph10 836 context->c.asuchars[context->ucharptr] = *cc;
2809     context->oc.asuchars[context->ucharptr] = 0;
2810 ph10 664 }
2811 ph10 836 context->ucharptr++;
2812 ph10 664
2813 ph10 836 #ifdef COMPILE_PCRE8
2814     if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2815     #else
2816     if (context->ucharptr >= 2 || context->length == 0)
2817     #endif
2818 ph10 664 {
2819     if (context->length >= 4)
2820     OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2821 ph10 836 #ifdef COMPILE_PCRE8
2822 ph10 664 else if (context->length >= 2)
2823 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2824 ph10 664 else if (context->length >= 1)
2825     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2826 ph10 836 #else
2827     else if (context->length >= 2)
2828 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2829 ph10 836 #endif
2830 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2831    
2832 ph10 836 switch(context->ucharptr)
2833 ph10 664 {
2834 ph10 836 case 4 / sizeof(pcre_uchar):
2835 ph10 664 if (context->oc.asint != 0)
2836     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2837     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2838     break;
2839    
2840 ph10 836 case 2 / sizeof(pcre_uchar):
2841 zherczeg 847 if (context->oc.asushort != 0)
2842     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2843     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2844 ph10 664 break;
2845    
2846 ph10 836 #ifdef COMPILE_PCRE8
2847 ph10 664 case 1:
2848     if (context->oc.asbyte != 0)
2849     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2850     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2851     break;
2852 ph10 836 #endif
2853 ph10 664
2854     default:
2855     SLJIT_ASSERT_STOP();
2856     break;
2857     }
2858 ph10 836 context->ucharptr = 0;
2859 ph10 664 }
2860 ph10 691
2861 ph10 664 #else
2862    
2863     /* Unaligned read is unsupported. */
2864 ph10 836 #ifdef COMPILE_PCRE8
2865 ph10 664 if (context->length > 0)
2866     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2867 ph10 836 #else
2868     if (context->length > 0)
2869     OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2870     #endif
2871 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2872    
2873 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2874 ph10 664 {
2875     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2876     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2877     }
2878     else
2879     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2880    
2881     #endif
2882    
2883     cc++;
2884 ph10 836 #ifdef SUPPORT_UTF
2885     utflength--;
2886 ph10 664 }
2887 ph10 836 while (utflength > 0);
2888 ph10 664 #endif
2889    
2890     return cc;
2891     }
2892    
2893 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2894 ph10 664
2895     #define SET_TYPE_OFFSET(value) \
2896     if ((value) != typeoffset) \
2897     { \
2898     if ((value) > typeoffset) \
2899     OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2900     else \
2901     OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2902     } \
2903     typeoffset = (value);
2904    
2905     #define SET_CHAR_OFFSET(value) \
2906     if ((value) != charoffset) \
2907     { \
2908     if ((value) > charoffset) \
2909     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2910     else \
2911     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2912     } \
2913     charoffset = (value);
2914    
2915 ph10 836 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2916 ph10 664 {
2917     DEFINE_COMPILER;
2918     jump_list *found = NULL;
2919     jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2920     unsigned int c;
2921     int compares;
2922     struct sljit_jump *jump = NULL;
2923 ph10 836 pcre_uchar *ccbegin;
2924 ph10 664 #ifdef SUPPORT_UCP
2925     BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2926     BOOL charsaved = FALSE;
2927 zherczeg 715 int typereg = TMP1, scriptreg = TMP1;
2928     unsigned int typeoffset;
2929 ph10 664 #endif
2930 zherczeg 715 int invertcmp, numberofcmps;
2931     unsigned int charoffset;
2932 ph10 664
2933 ph10 836 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2934 zherczeg 914 fallback_at_str_end(common, fallbacks);
2935 ph10 664 read_char(common);
2936    
2937     if ((*cc++ & XCL_MAP) != 0)
2938     {
2939     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2940 ph10 836 #ifndef COMPILE_PCRE8
2941     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2942     #elif defined SUPPORT_UTF
2943     if (common->utf)
2944 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2945 ph10 836 #endif
2946 ph10 664
2947     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2948     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2949     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2950     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2951     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2952     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2953    
2954 ph10 836 #ifndef COMPILE_PCRE8
2955     JUMPHERE(jump);
2956     #elif defined SUPPORT_UTF
2957     if (common->utf)
2958 ph10 664 JUMPHERE(jump);
2959 ph10 836 #endif
2960 ph10 664 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2961     #ifdef SUPPORT_UCP
2962     charsaved = TRUE;
2963     #endif
2964 ph10 836 cc += 32 / sizeof(pcre_uchar);
2965 ph10 664 }
2966    
2967     /* Scanning the necessary info. */
2968     ccbegin = cc;
2969     compares = 0;
2970     while (*cc != XCL_END)
2971     {
2972     compares++;
2973     if (*cc == XCL_SINGLE)
2974     {
2975     cc += 2;
2976 ph10 836 #ifdef SUPPORT_UTF
2977     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2978 ph10 664 #endif
2979     #ifdef SUPPORT_UCP
2980     needschar = TRUE;
2981     #endif
2982     }
2983     else if (*cc == XCL_RANGE)
2984     {
2985     cc += 2;
2986 ph10 836 #ifdef SUPPORT_UTF
2987     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2988 ph10 664 #endif
2989     cc++;
2990 ph10 836 #ifdef SUPPORT_UTF
2991     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2992 ph10 664 #endif
2993     #ifdef SUPPORT_UCP
2994     needschar = TRUE;
2995     #endif
2996     }
2997     #ifdef SUPPORT_UCP
2998     else
2999     {
3000     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3001     cc++;
3002     switch(*cc)
3003     {
3004     case PT_ANY:
3005     break;
3006    
3007     case PT_LAMP:
3008     case PT_GC:
3009     case PT_PC:
3010     case PT_ALNUM:
3011     needstype = TRUE;
3012     break;
3013    
3014     case PT_SC:
3015     needsscript = TRUE;
3016     break;
3017    
3018     case PT_SPACE:
3019     case PT_PXSPACE:
3020     case PT_WORD:
3021     needstype = TRUE;
3022     needschar = TRUE;
3023     break;
3024    
3025     default:
3026     SLJIT_ASSERT_STOP();
3027     break;
3028     }
3029     cc += 2;
3030     }
3031     #endif
3032     }
3033    
3034     #ifdef SUPPORT_UCP
3035     /* Simple register allocation. TMP1 is preferred if possible. */
3036     if (needstype || needsscript)
3037     {
3038     if (needschar && !charsaved)
3039     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3040     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3041     if (needschar)
3042     {
3043     if (needstype)
3044     {
3045     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3046     typereg = RETURN_ADDR;
3047     }
3048    
3049     if (needsscript)
3050     scriptreg = TMP3;
3051     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3052     }
3053     else if (needstype && needsscript)
3054     scriptreg = TMP3;
3055     /* In all other cases only one of them was specified, and that can goes to TMP1. */
3056    
3057     if (needsscript)
3058     {
3059     if (scriptreg == TMP1)
3060     {
3061 ph10 836 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3062 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3063     }
3064     else
3065     {
3066     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3067 ph10 836 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3068 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3069     }
3070     }
3071     }
3072     #endif
3073    
3074     /* Generating code. */
3075     cc = ccbegin;
3076     charoffset = 0;
3077     numberofcmps = 0;
3078     #ifdef SUPPORT_UCP
3079     typeoffset = 0;
3080     #endif
3081    
3082     while (*cc != XCL_END)
3083     {
3084     compares--;
3085     invertcmp = (compares == 0 && list != fallbacks);
3086     jump = NULL;
3087    
3088     if (*cc == XCL_SINGLE)
3089     {
3090     cc ++;
3091 ph10 836 #ifdef SUPPORT_UTF
3092     if (common->utf)
3093 ph10 664 {
3094     GETCHARINC(c, cc);
3095     }
3096     else
3097     #endif
3098     c = *cc++;
3099    
3100     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3101     {
3102     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3103     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3104     numberofcmps++;
3105     }
3106     else if (numberofcmps > 0)
3107     {
3108     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3109     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3110     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3111     numberofcmps = 0;
3112     }
3113     else
3114     {
3115     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3116     numberofcmps = 0;
3117     }
3118     }
3119     else if (*cc == XCL_RANGE)
3120     {
3121     cc ++;
3122 ph10 836 #ifdef SUPPORT_UTF
3123     if (common->utf)
3124 ph10 664 {
3125     GETCHARINC(c, cc);
3126     }
3127     else
3128     #endif
3129     c = *cc++;
3130     SET_CHAR_OFFSET(c);
3131 ph10 836 #ifdef SUPPORT_UTF
3132     if (common->utf)
3133 ph10 664 {
3134     GETCHARINC(c, cc);
3135     }
3136     else
3137     #endif
3138     c = *cc++;
3139     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3140     {
3141     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3142     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3143     numberofcmps++;
3144     }
3145     else if (numberofcmps > 0)
3146     {
3147     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3148     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3149     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3150     numberofcmps = 0;
3151     }
3152     else
3153     {
3154     jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3155     numberofcmps = 0;
3156     }
3157     }
3158     #ifdef SUPPORT_UCP
3159     else
3160     {
3161     if (*cc == XCL_NOTPROP)
3162     invertcmp ^= 0x1;
3163     cc++;
3164     switch(*cc)
3165     {
3166     case PT_ANY:
3167     if (list != fallbacks)
3168     {
3169     if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3170     continue;
3171     }
3172     else if (cc[-1] == XCL_NOTPROP)
3173     continue;
3174     jump = JUMP(SLJIT_JUMP);
3175     break;
3176    
3177     case PT_LAMP:
3178     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3179     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3180     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3181     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3182     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3183     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3184     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3185     break;
3186    
3187     case PT_GC:
3188 ph10 836 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3189 ph10 664 SET_TYPE_OFFSET(c);
3190 ph10 836 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3191 ph10 664 break;
3192    
3193     case PT_PC:
3194     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3195     break;
3196    
3197     case PT_SC:
3198     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3199     break;
3200    
3201     case PT_SPACE:
3202     case PT_PXSPACE:
3203     if (*cc == PT_SPACE)
3204     {
3205     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3206     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3207     }
3208     SET_CHAR_OFFSET(9);
3209     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3210     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3211     if (*cc == PT_SPACE)
3212     JUMPHERE(jump);
3213    
3214     SET_TYPE_OFFSET(ucp_Zl);
3215     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3216     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3217     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3218     break;
3219    
3220     case PT_WORD:
3221     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3222     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3223     /* ... fall through */
3224    
3225     case PT_ALNUM:
3226     SET_TYPE_OFFSET(ucp_Ll);
3227     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3228     COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3229     SET_TYPE_OFFSET(ucp_Nd);
3230     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3231     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3232     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3233     break;
3234     }
3235     cc += 2;
3236     }
3237     #endif
3238    
3239     if (jump != NULL)
3240     add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3241     }
3242    
3243     if (found != NULL)
3244     set_jumps(found, LABEL());
3245     }
3246    
3247     #undef SET_TYPE_OFFSET
3248     #undef SET_CHAR_OFFSET
3249    
3250     #endif
3251    
3252 ph10 836 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3253 ph10 664 {
3254     DEFINE_COMPILER;
3255     int length;
3256     unsigned int c, oc, bit;
3257     compare_context context;
3258     struct sljit_jump *jump[4];
3259 ph10 836 #ifdef SUPPORT_UTF
3260 ph10 670 struct sljit_label *label;
3261 ph10 664 #ifdef SUPPORT_UCP
3262 ph10 836 pcre_uchar propdata[5];
3263 ph10 664 #endif
3264     #endif
3265    
3266     switch(type)
3267     {
3268     case OP_SOD:
3269     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3270     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3271     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3272     return cc;
3273    
3274     case OP_SOM:
3275     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3276     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3277     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3278     return cc;
3279    
3280     case OP_NOT_WORD_BOUNDARY:
3281     case OP_WORD_BOUNDARY:
3282     add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3283     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3284     return cc;
3285    
3286     case OP_NOT_DIGIT:
3287     case OP_DIGIT:
3288 zherczeg 914 fallback_at_str_end(common, fallbacks);
3289 ph10 664 read_char8_type(common);
3290     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3291     add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3292     return cc;
3293    
3294     case OP_NOT_WHITESPACE:
3295     case OP_WHITESPACE:
3296 zherczeg 914 fallback_at_str_end(common, fallbacks);
3297 ph10 664 read_char8_type(common);
3298     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3299     add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3300     return cc;
3301    
3302     case OP_NOT_WORDCHAR:
3303     case OP_WORDCHAR:
3304 zherczeg 914 fallback_at_str_end(common, fallbacks);
3305 ph10 664 read_char8_type(common);
3306     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3307     add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3308     return cc;
3309    
3310     case OP_ANY:
3311 zherczeg 914 fallback_at_str_end(common, fallbacks);
3312 ph10 664 read_char(common);
3313     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3314     {
3315     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3316 zherczeg 920 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3317 zherczeg 918 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3318     else
3319 zherczeg 920 jump[1] = check_str_end(common);
3320 zherczeg 918
3321 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3322 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3323 zherczeg 918 if (jump[1] != NULL)
3324     JUMPHERE(jump[1]);
3325 ph10 664 JUMPHERE(jump[0]);
3326     }
3327     else
3328     check_newlinechar(common, common->nltype, fallbacks, TRUE);
3329     return cc;
3330    
3331     case OP_ALLANY:
3332 zherczeg 914 fallback_at_str_end(common, fallbacks);
3333 ph10 836 #ifdef SUPPORT_UTF
3334     if (common->utf)
3335 ph10 664 {
3336 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3337     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3338     #ifdef COMPILE_PCRE8
3339 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3340 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3341 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3342 ph10 836 #else /* COMPILE_PCRE8 */
3343     #ifdef COMPILE_PCRE16
3344     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3345     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3346     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3347     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3348     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3349     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3350     #endif /* COMPILE_PCRE16 */
3351     #endif /* COMPILE_PCRE8 */
3352 zherczeg 736 JUMPHERE(jump[0]);
3353 ph10 664 return cc;
3354     }
3355     #endif
3356 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3357 ph10 664 return cc;
3358    
3359 zherczeg 736 case OP_ANYBYTE:
3360 zherczeg 914 fallback_at_str_end(common, fallbacks);
3361 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3362 zherczeg 736 return cc;
3363    
3364 ph10 836 #ifdef SUPPORT_UTF
3365 ph10 664 #ifdef SUPPORT_UCP
3366     case OP_NOTPROP:
3367     case OP_PROP:
3368     propdata[0] = 0;
3369     propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3370     propdata[2] = cc[0];
3371     propdata[3] = cc[1];
3372     propdata[4] = XCL_END;
3373     compile_xclass_hotpath(common, propdata, fallbacks);
3374     return cc + 2;
3375     #endif
3376     #endif
3377    
3378     case OP_ANYNL:
3379 zherczeg 914 fallback_at_str_end(common, fallbacks);
3380 ph10 664 read_char(common);
3381     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3382 zherczeg 918 /* We don't need to handle soft partial matching case. */
3383     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3384     jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3385     else
3386     jump[1] = check_str_end(common);
3387 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3388 ph10 664 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3389 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3390 ph10 664 jump[3] = JUMP(SLJIT_JUMP);
3391     JUMPHERE(jump[0]);
3392     check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3393     JUMPHERE(jump[1]);
3394     JUMPHERE(jump[2]);
3395     JUMPHERE(jump[3]);
3396     return cc;
3397    
3398     case OP_NOT_HSPACE:
3399     case OP_HSPACE:
3400 zherczeg 914 fallback_at_str_end(common, fallbacks);
3401 ph10 664 read_char(common);
3402     add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3403     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3404     return cc;
3405    
3406     case OP_NOT_VSPACE:
3407     case OP_VSPACE:
3408 zherczeg 914 fallback_at_str_end(common, fallbacks);
3409 ph10 664 read_char(common);
3410     add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3411     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3412     return cc;
3413    
3414     #ifdef SUPPORT_UCP
3415     case OP_EXTUNI:
3416 zherczeg 914 fallback_at_str_end(common, fallbacks);
3417 ph10 664 read_char(common);
3418     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3419     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3420     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3421    
3422     label = LABEL();
3423     jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3425     read_char(common);
3426     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3427     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3428     CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3429    
3430     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3431     JUMPHERE(jump[0]);
3432 zherczeg 915 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3433     {
3434     jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3435 zherczeg 918 /* Since we successfully read a char above, partial matching must occure. */
3436     check_partial(common, TRUE);
3437 zherczeg 915 JUMPHERE(jump[0]);
3438     }
3439 ph10 664 return cc;
3440     #endif
3441    
3442     case OP_EODN:
3443 zherczeg 918 /* Requires rather complex checks. */
3444 ph10 664 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3446     {
3447 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3448     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3449 zherczeg 918 if (common->mode == JIT_COMPILE)
3450     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3451     else
3452     {
3453     jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3454     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3455     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3456     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3457     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3458     add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3459     check_partial(common, TRUE);
3460     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3461     JUMPHERE(jump[1]);
3462     }
3463 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3464 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3465     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3466     }
3467     else if (common->nltype == NLTYPE_FIXED)
3468     {
3469 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3471 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3472     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3473     }
3474     else
3475     {
3476 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3477 ph10 664 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3478 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3479 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3480     jump[2] = JUMP(SLJIT_C_GREATER);
3481     add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3482 ph10 836 /* Equal. */
3483     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3484 ph10 664 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3485     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3486    
3487     JUMPHERE(jump[1]);
3488     if (common->nltype == NLTYPE_ANYCRLF)
3489     {
3490 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3491 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3492     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3493     }
3494     else
3495     {
3496     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3497     read_char(common);
3498     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3499     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3500     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3501     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3502     }
3503     JUMPHERE(jump[2]);
3504     JUMPHERE(jump[3]);
3505     }
3506     JUMPHERE(jump[0]);
3507 zherczeg 918 check_partial(common, FALSE);
3508 ph10 664 return cc;
3509    
3510     case OP_EOD:
3511 zherczeg 918 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3512     check_partial(common, FALSE);
3513 ph10 664 return cc;
3514    
3515     case OP_CIRC:
3516     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3517     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3518     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3519     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3520     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3521     return cc;
3522    
3523     case OP_CIRCM:
3524     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3525     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3526     jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3527     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3528     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3529     jump[0] = JUMP(SLJIT_JUMP);
3530     JUMPHERE(jump[1]);
3531    
3532 zherczeg 914 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3533 ph10 664 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3534     {
3535 ph10 836 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3536 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3537 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3538     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3539 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3540     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3541     }
3542     else
3543     {
3544     skip_char_back(common);
3545     read_char(common);
3546     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3547     }
3548     JUMPHERE(jump[0]);
3549     return cc;
3550    
3551     case OP_DOLL:
3552     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3553     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3554     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3555    
3556     if (!common->endonly)
3557     compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3558     else
3559 zherczeg 914 {
3560 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3561 zherczeg 918 check_partial(common, FALSE);
3562 zherczeg 914 }
3563 ph10 664 return cc;
3564    
3565     case OP_DOLLM:
3566     jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3567     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3568     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3569     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3570 zherczeg 918 check_partial(common, FALSE);
3571 ph10 664 jump[0] = JUMP(SLJIT_JUMP);
3572     JUMPHERE(jump[1]);
3573    
3574     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3575     {
3576 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3577     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3578 zherczeg 918 if (common->mode == JIT_COMPILE)
3579     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3580     else
3581     {
3582     jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3583     /* STR_PTR = STR_END - IN_UCHARS(1) */
3584     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3585     check_partial(common, TRUE);
3586     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3587     JUMPHERE(jump[1]);
3588     }
3589    
3590 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3591 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3592     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3593     }
3594     else
3595     {
3596     peek_char(common);
3597     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3598     }
3599     JUMPHERE(jump[0]);
3600     return cc;
3601    
3602     case OP_CHAR:
3603     case OP_CHARI:
3604     length = 1;
3605 ph10 836 #ifdef SUPPORT_UTF
3606     if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3607 ph10 664 #endif
3608 zherczeg 914 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3609 ph10 664 {
3610 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3611 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3612    
3613 ph10 836 context.length = IN_UCHARS(length);
3614 ph10 664 context.sourcereg = -1;
3615     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3616 ph10 836 context.ucharptr = 0;
3617 ph10 664 #endif
3618     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3619     }
3620 zherczeg 914 fallback_at_str_end(common, fallbacks);
3621 ph10 664 read_char(common);
3622 ph10 836 #ifdef SUPPORT_UTF
3623     if (common->utf)
3624 ph10 664 {
3625     GETCHAR(c, cc);
3626     }
3627     else
3628     #endif
3629     c = *cc;
3630 zherczeg 914 if (type == OP_CHAR || !char_has_othercase(common, cc))
3631     {
3632     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3633     return cc + length;
3634     }
3635     oc = char_othercase(common, c);
3636     bit = c ^ oc;
3637     if (ispowerof2(bit))
3638     {
3639     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3640     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3641     return cc + length;
3642     }
3643 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3644     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3645     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3646     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3647     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3648     return cc + length;
3649    
3650     case OP_NOT:
3651     case OP_NOTI:
3652 zherczeg 914 fallback_at_str_end(common, fallbacks);
3653 ph10 664 length = 1;
3654 ph10 836 #ifdef SUPPORT_UTF
3655     if (common->utf)
3656 ph10 664 {
3657 ph10 836 #ifdef COMPILE_PCRE8
3658     c = *cc;
3659     if (c < 128)
3660 ph10 664 {
3661     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3662     if (type == OP_NOT || !char_has_othercase(common, cc))
3663     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3664     else
3665     {
3666     /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3667 zherczeg 736 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3668     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3669 ph10 664 }
3670     /* Skip the variable-length character. */
3671 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3672 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3673 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3674 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3675     JUMPHERE(jump[0]);
3676 ph10 836 return cc + 1;
3677 ph10 664 }
3678     else
3679 ph10 836 #endif /* COMPILE_PCRE8 */
3680     {
3681     GETCHARLEN(c, cc, length);
3682 ph10 664 read_char(common);
3683 ph10 836 }
3684 ph10 664 }
3685     else
3686 ph10 836 #endif /* SUPPORT_UTF */
3687 ph10 664 {
3688 ph10 836 read_char(common);
3689 ph10 664 c = *cc;
3690     }
3691    
3692     if (type == OP_NOT || !char_has_othercase(common, cc))
3693     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3694     else
3695     {
3696     oc = char_othercase(common, c);
3697     bit = c ^ oc;
3698     if (ispowerof2(bit))
3699     {
3700     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3701     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3702     }
3703     else
3704     {
3705     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3706     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3707     }
3708     }
3709 zherczeg 924 return cc + length;
3710 ph10 664
3711     case OP_CLASS:
3712     case OP_NCLASS:
3713 zherczeg 914 fallback_at_str_end(common, fallbacks);
3714 ph10 664 read_char(common);
3715 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3716 ph10 664 jump[0] = NULL;
3717 ph10 836 #ifdef COMPILE_PCRE8
3718     /* This check only affects 8 bit mode. In other modes, we
3719     always need to compare the value with 255. */
3720     if (common->utf)
3721     #endif /* COMPILE_PCRE8 */
3722 ph10 664 {
3723     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3724     if (type == OP_CLASS)
3725     {
3726     add_jump(compiler, fallbacks, jump[0]);
3727     jump[0] = NULL;
3728     }
3729     }
3730 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3731 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3732     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3733     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3734     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3735     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3736     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3737 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3738 ph10 664 if (jump[0] != NULL)
3739     JUMPHERE(jump[0]);
3740 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3741     return cc + 32 / sizeof(pcre_uchar);
3742 ph10 664
3743 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3744 ph10 664 case OP_XCLASS:
3745     compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3746     return cc + GET(cc, 0) - 1;
3747     #endif
3748    
3749     case OP_REVERSE:
3750     length = GET(cc, 0);
3751 zherczeg 953 if (length == 0)
3752     return cc + LINK_SIZE;
3753 ph10 664 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3754 ph10 836 #ifdef SUPPORT_UTF
3755     if (common->utf)
3756 ph10 664 {
3757 ph10 836 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3758 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3759     label = LABEL();
3760 ph10 836 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3761 ph10 664 skip_char_back(common);
3762     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3763     JUMPTO(SLJIT_C_NOT_ZERO, label);
3764     }
3765 zherczeg 914 else
3766 ph10 664 #endif
3767 zherczeg 914 {
3768     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3769     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3770     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3771     }
3772     check_start_used_ptr(common);
3773 ph10 664 return cc + LINK_SIZE;
3774     }
3775     SLJIT_ASSERT_STOP();
3776     return cc;
3777     }
3778    
3779 ph10 836 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3780 ph10 664 {
3781     /* This function consumes at least one input character. */
3782     /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3783     DEFINE_COMPILER;
3784 ph10 836 pcre_uchar *ccbegin = cc;
3785 ph10 664 compare_context context;
3786     int size;
3787    
3788     context.length = 0;
3789     do
3790     {
3791     if (cc >= ccend)
3792     break;
3793    
3794     if (*cc == OP_CHAR)
3795     {
3796     size = 1;
3797 ph10 836 #ifdef SUPPORT_UTF
3798     if (common->utf && HAS_EXTRALEN(cc[1]))
3799     size += GET_EXTRALEN(cc[1]);
3800 ph10 664 #endif
3801     }
3802     else if (*cc == OP_CHARI)
3803     {
3804     size = 1;
3805 ph10 836 #ifdef SUPPORT_UTF
3806     if (common->utf)
3807 ph10 664 {
3808     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3809     size = 0;
3810 ph10 836 else if (HAS_EXTRALEN(cc[1]))
3811     size += GET_EXTRALEN(cc[1]);
3812 ph10 664 }
3813 ph10 691 else
3814 ph10 664 #endif
3815     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3816     size = 0;
3817     }
3818     else
3819     size = 0;
3820    
3821     cc += 1 + size;
3822 ph10 836 context.length += IN_UCHARS(size);
3823 ph10 664 }
3824     while (size > 0 && context.length <= 128);
3825    
3826     cc = ccbegin;
3827     if (context.length > 0)
3828     {
3829     /* We have a fixed-length byte sequence. */
3830     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3831     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3832    
3833     context.sourcereg = -1;
3834     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3835 ph10 836 context.ucharptr = 0;
3836 ph10 664 #endif
3837     do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3838     return cc;
3839     }
3840    
3841     /* A non-fixed length character will be checked if length == 0. */
3842     return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3843     }
3844    
3845 ph10 836 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3846 ph10 664 {
3847     DEFINE_COMPILER;
3848     int offset = GET2(cc, 1) << 1;
3849    
3850     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3851     if (!common->jscript_compat)
3852     {
3853     if (fallbacks == NULL)
3854     {
3855 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3856 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3857     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3858     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3859     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3860     return JUMP(SLJIT_C_NOT_ZERO);
3861     }
3862     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3863     }
3864     return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3865     }
3866    
3867     /* Forward definitions. */
3868 ph10 836 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3869 ph10 664 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3870    
3871     #define PUSH_FALLBACK(size, ccstart, error) \
3872     do \
3873     { \
3874     fallback = sljit_alloc_memory(compiler, (size)); \
3875     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3876     return error; \
3877     memset(fallback, 0, size); \
3878     fallback->prev = parent->top; \
3879     fallback->cc = (ccstart); \
3880     parent->top = fallback; \
3881     } \
3882     while (0)
3883    
3884     #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3885     do \
3886     { \
3887     fallback = sljit_alloc_memory(compiler, (size)); \
3888     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3889     return; \
3890     memset(fallback, 0, size); \
3891     fallback->prev = parent->top; \
3892     fallback->cc = (ccstart); \
3893     parent->top = fallback; \
3894     } \
3895     while (0)
3896    
3897 zherczeg 914 #define FALLBACK_AS(type) ((type *)fallback)
3898 ph10 664
3899 ph10 836 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3900 ph10 664 {
3901     DEFINE_COMPILER;
3902     int offset = GET2(cc, 1) << 1;
3903     struct sljit_jump *jump = NULL;
3904 zherczeg 915 struct sljit_jump *partial;
3905     struct sljit_jump *nopartial;
3906 ph10 664
3907     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3908 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3909 ph10 664 if (withchecks && !common->jscript_compat)
3910     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3911    
3912 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3913     if (common->utf && *cc == OP_REFI)
3914 ph10 664 {
3915     SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3916     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3917     if (withchecks)
3918     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3919    
3920     /* Needed to save important temporary registers. */
3921     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3922     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3923 zherczeg 929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3924 ph10 836 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3925 ph10 664 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3926 zherczeg 915 if (common->mode == JIT_COMPILE)
3927     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3928     else
3929     {
3930     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3931     nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3932 zherczeg 918 check_partial(common, FALSE);
3933 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3934     JUMPHERE(nopartial);
3935     }
3936 ph10 664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3937     }
3938     else
3939 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3940 ph10 664 {
3941     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3942     if (withchecks)
3943     jump = JUMP(SLJIT_C_ZERO);
3944 zherczeg 914
3945 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3946 zherczeg 915 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3947     if (common->mode == JIT_COMPILE)
3948     add_jump(compiler, fallbacks, partial);
3949 ph10 664
3950     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3951     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3952 zherczeg 915
3953     if (common->mode != JIT_COMPILE)
3954     {
3955     nopartial = JUMP(SLJIT_JUMP);
3956     JUMPHERE(partial);
3957     /* TMP2 -= STR_END - STR_PTR */
3958     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3959     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3960     partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3961     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3962     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3963     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3964     JUMPHERE(partial);
3965 zherczeg 918 check_partial(common, FALSE);
3966 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3967     JUMPHERE(nopartial);
3968     }
3969 ph10 664 }
3970    
3971     if (jump != NULL)
3972     {
3973     if (emptyfail)
3974     add_jump(compiler, fallbacks, jump);
3975     else
3976     JUMPHERE(jump);
3977     }
3978 ph10 836 return cc + 1 + IMM2_SIZE;
3979 ph10 664 }
3980    
3981 ph10 836 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3982 ph10 664 {
3983     DEFINE_COMPILER;
3984     fallback_common *fallback;
3985 ph10 836 pcre_uchar type;
3986 ph10 664 struct sljit_label *label;
3987     struct sljit_jump *zerolength;
3988     struct sljit_jump *jump = NULL;
3989 ph10 836 pcre_uchar *ccbegin = cc;
3990 ph10 664 int min = 0, max = 0;
3991     BOOL minimize;
3992    
3993     PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3994    
3995 ph10 836 type = cc[1 + IMM2_SIZE];
3996 ph10 664 minimize = (type & 0x1) != 0;
3997     switch(type)
3998     {
3999     case OP_CRSTAR:
4000     case OP_CRMINSTAR:
4001     min = 0;
4002     max = 0;
4003 ph10 836 cc += 1 + IMM2_SIZE + 1;
4004 ph10 664 break;
4005     case OP_CRPLUS:
4006     case OP_CRMINPLUS:
4007     min = 1;
4008     max = 0;
4009 ph10 836 cc += 1 + IMM2_SIZE + 1;
4010 ph10 664 break;
4011     case OP_CRQUERY:
4012     case OP_CRMINQUERY:
4013     min = 0;
4014     max = 1;
4015 ph10 836 cc += 1 + IMM2_SIZE + 1;
4016 ph10 664 break;
4017     case OP_CRRANGE:
4018     case OP_CRMINRANGE:
4019 ph10 836 min = GET2(cc, 1 + IMM2_SIZE + 1);
4020     max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4021     cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4022 ph10 664 break;
4023     default:
4024     SLJIT_ASSERT_STOP();
4025     break;
4026     }
4027    
4028     if (!minimize)
4029     {
4030     if (min == 0)
4031     {
4032     allocate_stack(common, 2);
4033     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4034     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4035     /* Temporary release of STR_PTR. */
4036     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4037     zerolength = compile_ref_checks(common, ccbegin, NULL);
4038     /* Restore if not zero length. */
4039     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4040     }
4041     else
4042     {
4043     allocate_stack(common, 1);
4044     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4045     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4046     }
4047    
4048     if (min > 1 || max > 1)
4049     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4050    
4051     label = LABEL();
4052     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
4053    
4054     if (min > 1 || max > 1)
4055     {
4056     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4057     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4058     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4059     if (min > 1)
4060     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4061     if (max > 1)
4062     {
4063     jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4064     allocate_stack(common, 1);
4065     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4066     JUMPTO(SLJIT_JUMP, label);
4067     JUMPHERE(jump);
4068     }
4069     }
4070    
4071     if (max == 0)
4072     {
4073     /* Includes min > 1 case as well. */
4074     allocate_stack(common, 1);
4075     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4076     JUMPTO(SLJIT_JUMP, label);
4077     }
4078    
4079     JUMPHERE(zerolength);
4080     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4081 ph10 677
4082     decrease_call_count(common);
4083 ph10 664 return cc;
4084     }
4085    
4086     allocate_stack(common, 2);
4087     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4088     if (type != OP_CRMINSTAR)
4089     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4090    
4091     if (min == 0)
4092     {
4093     zerolength = compile_ref_checks(common, ccbegin, NULL);
4094     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4095     jump = JUMP(SLJIT_JUMP);
4096     }
4097     else
4098     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4099    
4100     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4101     if (max > 0)
4102     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4103    
4104     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
4105     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4106    
4107     if (min > 1)
4108     {
4109     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4110     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4111     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4112     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
4113     }
4114     else if (max > 0)
4115     OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4116    
4117     if (jump != NULL)
4118     JUMPHERE(jump);
4119     JUMPHERE(zerolength);
4120 ph10 677
4121     decrease_call_count(common);
4122 ph10 664 return cc;
4123     }
4124    
4125 ph10 836 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4126 ph10 664 {
4127     DEFINE_COMPILER;
4128     fallback_common *fallback;
4129     recurse_entry *entry = common->entries;
4130     recurse_entry *prev = NULL;
4131     int start = GET(cc, 1);
4132    
4133     PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
4134     while (entry != NULL)
4135