/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 941 - (hide annotations) (download)
Tue Feb 28 11:33:34 2012 UTC (14 months, 2 weeks ago) by zherczeg
File MIME type: text/plain
File size: 231705 byte(s)
(COMMIT*) is now supported by the JIT compiler
1 ph10 664 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 664
11     The machine code generator part (this module) was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 664
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47     #include "pcre_internal.h"
48    
49     #ifdef SUPPORT_JIT
50    
51     /* All-in-one: Since we use the JIT compiler only from here,
52     we just include it. This way we don't need to touch the build
53     system files. */
54    
55 ph10 836 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56     #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 ph10 664 #define SLJIT_CONFIG_AUTO 1
58 zherczeg 741 #define SLJIT_CONFIG_STATIC 1
59 ph10 664 #define SLJIT_VERBOSE 0
60     #define SLJIT_DEBUG 0
61    
62     #include "sljit/sljitLir.c"
63    
64     #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 ph10 836 #error Unsupported architecture
66 ph10 664 #endif
67    
68     /* Allocate memory on the stack. Fast, but limited size. */
69     #define LOCAL_SPACE_SIZE 32768
70    
71     #define STACK_GROWTH_RATE 8192
72    
73     /* Enable to check that the allocation could destroy temporaries. */
74     #if defined SLJIT_DEBUG && SLJIT_DEBUG
75     #define DESTROY_REGISTERS 1
76     #endif
77    
78     /*
79     Short summary about the backtracking mechanism empolyed by the jit code generator:
80    
81     The code generator follows the recursive nature of the PERL compatible regular
82     expressions. The basic blocks of regular expressions are condition checkers
83     whose execute different commands depending on the result of the condition check.
84     The relationship between the operators can be horizontal (concatenation) and
85     vertical (sub-expression) (See struct fallback_common for more details).
86    
87     'ab' - 'a' and 'b' regexps are concatenated
88     'a+' - 'a' is the sub-expression of the '+' operator
89    
90     The condition checkers are boolean (true/false) checkers. Machine code is generated
91     for the checker itself and for the actions depending on the result of the checker.
92     The 'true' case is called as the hot path (expected path), and the other is called as
93     the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94     branches on the hot path.
95    
96     Greedy star operator (*) :
97     Hot path: match happens.
98     Fallback path: match failed.
99     Non-greedy star operator (*?) :
100     Hot path: no need to perform a match.
101     Fallback path: match is required.
102    
103     The following example shows how the code generated for a capturing bracket
104     with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105     we have the following regular expression:
106    
107     A(B|C)D
108    
109     The generated code will be the following:
110    
111     A hot path
112     '(' hot path (pushing arguments to the stack)
113     B hot path
114     ')' hot path (pushing arguments to the stack)
115     D hot path
116     return with successful match
117    
118     D fallback path
119     ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120     B fallback path
121     C expected path
122     jump to D hot path
123     C fallback path
124     A fallback path
125 ph10 691
126 ph10 664 Notice, that the order of fallback code paths are the opposite of the fast
127     code paths. In this way the topmost value on the stack is always belong
128     to the current fallback code path. The fallback code path must check
129     whether there is a next alternative. If so, it needs to jump back to
130     the hot path eventually. Otherwise it needs to clear out its own stack
131     frame and continue the execution on the fallback code paths.
132     */
133    
134     /*
135     Saved stack frames:
136    
137     Atomic blocks and asserts require reloading the values of local variables
138     when the fallback mechanism performed. Because of OP_RECURSE, the locals
139     are not necessarly known in compile time, thus we need a dynamic restore
140     mechanism.
141    
142     The stack frames are stored in a chain list, and have the following format:
143     ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144    
145     Thus we can restore the locals to a particular point in the stack.
146     */
147    
148     typedef struct jit_arguments {
149     /* Pointers first. */
150     struct sljit_stack *stack;
151 ph10 836 const pcre_uchar *str;
152     const pcre_uchar *begin;
153     const pcre_uchar *end;
154 ph10 664 int *offsets;
155 zherczeg 929 pcre_uchar *uchar_ptr;
156     pcre_uchar *mark_ptr;
157 ph10 664 /* Everything else after. */
158     int offsetcount;
159 ph10 677 int calllimit;
160 ph10 836 pcre_uint8 notbol;
161     pcre_uint8 noteol;
162     pcre_uint8 notempty;
163     pcre_uint8 notempty_atstart;
164 ph10 664 } jit_arguments;
165    
166 zherczeg 914 typedef struct executable_functions {
167 zherczeg 915 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 zherczeg 852 PUBL(jit_callback) callback;
169 ph10 664 void *userdata;
170 zherczeg 915 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 zherczeg 914 } executable_functions;
172 ph10 664
173     typedef struct jump_list {
174     struct sljit_jump *jump;
175     struct jump_list *next;
176     } jump_list;
177    
178 zherczeg 696 enum stub_types { stack_alloc };
179 ph10 664
180     typedef struct stub_list {
181     enum stub_types type;
182     int data;
183     struct sljit_jump *start;
184     struct sljit_label *leave;
185     struct stub_list *next;
186     } stub_list;
187    
188     typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189    
190     /* The following structure is the key data type for the recursive
191     code generator. It is allocated by compile_hotpath, and contains
192     the aguments for compile_fallbackpath. Must be the first member
193     of its descendants. */
194     typedef struct fallback_common {
195     /* Concatenation stack. */
196     struct fallback_common *prev;
197     jump_list *nextfallbacks;
198     /* Internal stack (for component operators). */
199     struct fallback_common *top;
200     jump_list *topfallbacks;
201     /* Opcode pointer. */
202 ph10 836 pcre_uchar *cc;
203 ph10 664 } fallback_common;
204    
205     typedef struct assert_fallback {
206     fallback_common common;
207     jump_list *condfailed;
208     /* Less than 0 (-1) if a frame is not needed. */
209     int framesize;
210     /* Points to our private memory word on the stack. */
211     int localptr;
212     /* For iterators. */
213     struct sljit_label *hotpath;
214     } assert_fallback;
215    
216     typedef struct bracket_fallback {
217     fallback_common common;
218     /* Where to coninue if an alternative is successfully matched. */
219     struct sljit_label *althotpath;
220     /* For rmin and rmax iterators. */
221     struct sljit_label *recursivehotpath;
222     /* For greedy ? operator. */
223     struct sljit_label *zerohotpath;
224     /* Contains the branches of a failed condition. */
225     union {
226     /* Both for OP_COND, OP_SCOND. */
227     jump_list *condfailed;
228     assert_fallback *assert;
229     /* For OP_ONCE. -1 if not needed. */
230     int framesize;
231     } u;
232     /* Points to our private memory word on the stack. */
233     int localptr;
234     } bracket_fallback;
235    
236     typedef struct bracketpos_fallback {
237     fallback_common common;
238     /* Points to our private memory word on the stack. */
239     int localptr;
240     /* Reverting stack is needed. */
241     int framesize;
242     /* Allocated stack size. */
243     int stacksize;
244     } bracketpos_fallback;
245    
246     typedef struct braminzero_fallback {
247     fallback_common common;
248     struct sljit_label *hotpath;
249     } braminzero_fallback;
250    
251     typedef struct iterator_fallback {
252     fallback_common common;
253     /* Next iteration. */
254     struct sljit_label *hotpath;
255     } iterator_fallback;
256    
257     typedef struct recurse_entry {
258     struct recurse_entry *next;
259     /* Contains the function entry. */
260     struct sljit_label *entry;
261     /* Collects the calls until the function is not created. */
262     jump_list *calls;
263     /* Points to the starting opcode. */
264     int start;
265     } recurse_entry;
266    
267     typedef struct recurse_fallback {
268     fallback_common common;
269     } recurse_fallback;
270    
271     typedef struct compiler_common {
272     struct sljit_compiler *compiler;
273 ph10 836 pcre_uchar *start;
274 zherczeg 920
275     /* Local stack area size and variable pointers. */
276 ph10 664 int localsize;
277     int *localptrs;
278 zherczeg 920 int cbraptr;
279     /* OVector starting point. Must be divisible by 2. */
280     int ovector_start;
281     /* Last known position of the requested byte. */
282     int req_char_ptr;
283     /* Head of the last recursion. */
284     int recursive_head;
285     /* First inspected character for partial matching. */
286     int start_used_ptr;
287     /* Starting pointer for partial soft matches. */
288     int hit_start;
289     /* End pointer of the first line. */
290     int first_line_end;
291 zherczeg 929 /* Points to the marked string. */
292     int mark_ptr;
293 zherczeg 920
294     /* Other */
295 ph10 836 const pcre_uint8 *fcc;
296 ph10 664 sljit_w lcc;
297 zherczeg 914 int mode;
298 ph10 664 int nltype;
299     int newline;
300     int bsr_nltype;
301     int endonly;
302 zherczeg 929 BOOL has_set_som;
303 ph10 664 sljit_w ctypes;
304 zherczeg 741 sljit_uw name_table;
305     sljit_w name_count;
306     sljit_w name_entry_size;
307 zherczeg 920
308     /* Labels and jump lists. */
309 zherczeg 914 struct sljit_label *partialmatchlabel;
310 zherczeg 941 struct sljit_label *leavelabel;
311 ph10 664 struct sljit_label *acceptlabel;
312     stub_list *stubs;
313     recurse_entry *entries;
314     recurse_entry *currententry;
315 zherczeg 914 jump_list *partialmatch;
316 zherczeg 941 jump_list *leave;
317 ph10 664 jump_list *accept;
318 ph10 677 jump_list *calllimit;
319 ph10 664 jump_list *stackalloc;
320     jump_list *revertframes;
321     jump_list *wordboundary;
322     jump_list *anynewline;
323     jump_list *hspace;
324     jump_list *vspace;
325     jump_list *casefulcmp;
326     jump_list *caselesscmp;
327     BOOL jscript_compat;
328 ph10 836 #ifdef SUPPORT_UTF
329     BOOL utf;
330 ph10 664 #ifdef SUPPORT_UCP
331 ph10 836 BOOL use_ucp;
332 ph10 664 #endif
333 ph10 836 jump_list *utfreadchar;
334     #ifdef COMPILE_PCRE8
335     jump_list *utfreadtype8;
336 ph10 664 #endif
337 ph10 836 #endif /* SUPPORT_UTF */
338 ph10 664 #ifdef SUPPORT_UCP
339     jump_list *getucd;
340     #endif
341     } compiler_common;
342    
343     /* For byte_sequence_compare. */
344    
345     typedef struct compare_context {
346     int length;
347     int sourcereg;
348     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
349 ph10 836 int ucharptr;
350 ph10 664 union {
351 ph10 836 sljit_i asint;
352 zherczeg 847 sljit_uh asushort;
353 ph10 836 #ifdef COMPILE_PCRE8
354 ph10 664 sljit_ub asbyte;
355 ph10 836 sljit_ub asuchars[4];
356     #else
357     #ifdef COMPILE_PCRE16
358     sljit_uh asuchars[2];
359     #endif
360     #endif
361 ph10 664 } c;
362     union {
363 ph10 836 sljit_i asint;
364 zherczeg 847 sljit_uh asushort;
365 ph10 836 #ifdef COMPILE_PCRE8
366 ph10 664 sljit_ub asbyte;
367 ph10 836 sljit_ub asuchars[4];
368     #else
369     #ifdef COMPILE_PCRE16
370     sljit_uh asuchars[2];
371     #endif
372     #endif
373 ph10 664 } oc;
374     #endif
375     } compare_context;
376    
377     enum {
378     frame_end = 0,
379 zherczeg 929 frame_setstrbegin = -1,
380     frame_setmark = -2
381 ph10 664 };
382    
383 zherczeg 883 /* Undefine sljit macros. */
384     #undef CMP
385    
386 ph10 664 /* Used for accessing the elements of the stack. */
387     #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
388    
389     #define TMP1 SLJIT_TEMPORARY_REG1
390     #define TMP2 SLJIT_TEMPORARY_REG3
391     #define TMP3 SLJIT_TEMPORARY_EREG2
392 zherczeg 880 #define STR_PTR SLJIT_SAVED_REG1
393     #define STR_END SLJIT_SAVED_REG2
394 ph10 664 #define STACK_TOP SLJIT_TEMPORARY_REG2
395 zherczeg 880 #define STACK_LIMIT SLJIT_SAVED_REG3
396     #define ARGUMENTS SLJIT_SAVED_EREG1
397     #define CALL_COUNT SLJIT_SAVED_EREG2
398 ph10 664 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
399    
400     /* Locals layout. */
401     /* These two locals can be used by the current opcode. */
402     #define LOCALS0 (0 * sizeof(sljit_w))
403     #define LOCALS1 (1 * sizeof(sljit_w))
404     /* Two local variables for possessive quantifiers (char1 cannot use them). */
405     #define POSSESSIVE0 (2 * sizeof(sljit_w))
406     #define POSSESSIVE1 (3 * sizeof(sljit_w))
407 ph10 677 /* Max limit of recursions. */
408 zherczeg 920 #define CALL_LIMIT (4 * sizeof(sljit_w))
409 ph10 664 /* The output vector is stored on the stack, and contains pointers
410     to characters. The vector data is divided into two groups: the first
411     group contains the start / end character pointers, and the second is
412     the start pointers when the end of the capturing group has not yet reached. */
413 zherczeg 920 #define OVECTOR_START (common->ovector_start)
414 ph10 664 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
415     #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
416 ph10 836 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
417 ph10 664
418 ph10 836 #ifdef COMPILE_PCRE8
419     #define MOV_UCHAR SLJIT_MOV_UB
420     #define MOVU_UCHAR SLJIT_MOVU_UB
421     #else
422     #ifdef COMPILE_PCRE16
423     #define MOV_UCHAR SLJIT_MOV_UH
424     #define MOVU_UCHAR SLJIT_MOVU_UH
425     #else
426     #error Unsupported compiling mode
427     #endif
428     #endif
429    
430 ph10 664 /* Shortcuts. */
431     #define DEFINE_COMPILER \
432     struct sljit_compiler *compiler = common->compiler
433     #define OP1(op, dst, dstw, src, srcw) \
434     sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
435     #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
436     sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
437     #define LABEL() \
438     sljit_emit_label(compiler)
439     #define JUMP(type) \
440     sljit_emit_jump(compiler, (type))
441     #define JUMPTO(type, label) \
442     sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
443     #define JUMPHERE(jump) \
444     sljit_set_label((jump), sljit_emit_label(compiler))
445     #define CMP(type, src1, src1w, src2, src2w) \
446     sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
447     #define CMPTO(type, src1, src1w, src2, src2w, label) \
448     sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
449     #define COND_VALUE(op, dst, dstw, type) \
450     sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
451    
452 ph10 836 static pcre_uchar* bracketend(pcre_uchar* cc)
453 ph10 664 {
454     SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
455     do cc += GET(cc, 1); while (*cc == OP_ALT);
456     SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
457     cc += 1 + LINK_SIZE;
458     return cc;
459     }
460    
461 ph10 691 /* Functions whose might need modification for all new supported opcodes:
462 ph10 664 next_opcode
463     get_localspace
464     set_localptrs
465     get_framesize
466     init_frame
467     get_localsize
468     copy_locals
469     compile_hotpath
470     compile_fallbackpath
471     */
472    
473 ph10 836 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
474 ph10 664 {
475     SLJIT_UNUSED_ARG(common);
476     switch(*cc)
477     {
478     case OP_SOD:
479     case OP_SOM:
480     case OP_SET_SOM:
481     case OP_NOT_WORD_BOUNDARY:
482     case OP_WORD_BOUNDARY:
483     case OP_NOT_DIGIT:
484     case OP_DIGIT:
485     case OP_NOT_WHITESPACE:
486     case OP_WHITESPACE:
487     case OP_NOT_WORDCHAR:
488     case OP_WORDCHAR:
489     case OP_ANY:
490     case OP_ALLANY:
491     case OP_ANYNL:
492     case OP_NOT_HSPACE:
493     case OP_HSPACE:
494     case OP_NOT_VSPACE:
495     case OP_VSPACE:
496     case OP_EXTUNI:
497     case OP_EODN:
498     case OP_EOD:
499     case OP_CIRC:
500     case OP_CIRCM:
501     case OP_DOLL:
502     case OP_DOLLM:
503     case OP_TYPESTAR:
504     case OP_TYPEMINSTAR:
505     case OP_TYPEPLUS:
506     case OP_TYPEMINPLUS:
507     case OP_TYPEQUERY:
508     case OP_TYPEMINQUERY:
509     case OP_TYPEPOSSTAR:
510     case OP_TYPEPOSPLUS:
511     case OP_TYPEPOSQUERY:
512     case OP_CRSTAR:
513     case OP_CRMINSTAR:
514     case OP_CRPLUS:
515     case OP_CRMINPLUS:
516     case OP_CRQUERY:
517     case OP_CRMINQUERY:
518     case OP_DEF:
519     case OP_BRAZERO:
520     case OP_BRAMINZERO:
521     case OP_BRAPOSZERO:
522 zherczeg 941 case OP_COMMIT:
523 ph10 664 case OP_FAIL:
524     case OP_ACCEPT:
525     case OP_ASSERT_ACCEPT:
526     case OP_SKIPZERO:
527     return cc + 1;
528    
529 zherczeg 736 case OP_ANYBYTE:
530 ph10 836 #ifdef SUPPORT_UTF
531     if (common->utf) return NULL;
532 zherczeg 736 #endif
533     return cc + 1;
534    
535 ph10 664 case OP_CHAR:
536     case OP_CHARI:
537     case OP_NOT:
538     case OP_NOTI:
539     case OP_STAR:
540     case OP_MINSTAR:
541     case OP_PLUS:
542     case OP_MINPLUS:
543     case OP_QUERY:
544     case OP_MINQUERY:
545     case OP_POSSTAR:
546     case OP_POSPLUS:
547     case OP_POSQUERY:
548     case OP_STARI:
549     case OP_MINSTARI:
550     case OP_PLUSI:
551     case OP_MINPLUSI:
552     case OP_QUERYI:
553     case OP_MINQUERYI:
554     case OP_POSSTARI:
555     case OP_POSPLUSI:
556     case OP_POSQUERYI:
557     case OP_NOTSTAR:
558     case OP_NOTMINSTAR:
559     case OP_NOTPLUS:
560     case OP_NOTMINPLUS:
561     case OP_NOTQUERY:
562     case OP_NOTMINQUERY:
563     case OP_NOTPOSSTAR:
564     case OP_NOTPOSPLUS:
565     case OP_NOTPOSQUERY:
566     case OP_NOTSTARI:
567     case OP_NOTMINSTARI:
568     case OP_NOTPLUSI:
569     case OP_NOTMINPLUSI:
570     case OP_NOTQUERYI:
571     case OP_NOTMINQUERYI:
572     case OP_NOTPOSSTARI:
573     case OP_NOTPOSPLUSI:
574     case OP_NOTPOSQUERYI:
575     cc += 2;
576 ph10 836 #ifdef SUPPORT_UTF
577     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
578 ph10 664 #endif
579     return cc;
580    
581     case OP_UPTO:
582     case OP_MINUPTO:
583     case OP_EXACT:
584     case OP_POSUPTO:
585     case OP_UPTOI:
586     case OP_MINUPTOI:
587     case OP_EXACTI:
588     case OP_POSUPTOI:
589     case OP_NOTUPTO:
590     case OP_NOTMINUPTO:
591     case OP_NOTEXACT:
592     case OP_NOTPOSUPTO:
593     case OP_NOTUPTOI:
594     case OP_NOTMINUPTOI:
595     case OP_NOTEXACTI:
596     case OP_NOTPOSUPTOI:
597 ph10 836 cc += 2 + IMM2_SIZE;
598     #ifdef SUPPORT_UTF
599     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
600 ph10 664 #endif
601     return cc;
602    
603     case OP_NOTPROP:
604     case OP_PROP:
605 ph10 836 return cc + 1 + 2;
606    
607 ph10 664 case OP_TYPEUPTO:
608     case OP_TYPEMINUPTO:
609     case OP_TYPEEXACT:
610     case OP_TYPEPOSUPTO:
611     case OP_REF:
612     case OP_REFI:
613     case OP_CREF:
614 zherczeg 741 case OP_NCREF:
615     case OP_RREF:
616     case OP_NRREF:
617 ph10 664 case OP_CLOSE:
618 ph10 836 cc += 1 + IMM2_SIZE;
619 ph10 664 return cc;
620    
621     case OP_CRRANGE:
622     case OP_CRMINRANGE:
623 ph10 836 return cc + 1 + 2 * IMM2_SIZE;
624 ph10 664
625     case OP_CLASS:
626     case OP_NCLASS:
627 ph10 836 return cc + 1 + 32 / sizeof(pcre_uchar);
628 ph10 664
629 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
630 ph10 664 case OP_XCLASS:
631     return cc + GET(cc, 1);
632     #endif
633    
634     case OP_RECURSE:
635     case OP_ASSERT:
636     case OP_ASSERT_NOT:
637     case OP_ASSERTBACK:
638     case OP_ASSERTBACK_NOT:
639     case OP_REVERSE:
640     case OP_ONCE:
641 zherczeg 726 case OP_ONCE_NC:
642 ph10 664 case OP_BRA:
643     case OP_BRAPOS:
644     case OP_COND:
645     case OP_SBRA:
646     case OP_SBRAPOS:
647     case OP_SCOND:
648     case OP_ALT:
649     case OP_KET:
650     case OP_KETRMAX:
651     case OP_KETRMIN:
652     case OP_KETRPOS:
653     return cc + 1 + LINK_SIZE;
654    
655     case OP_CBRA:
656     case OP_CBRAPOS:
657     case OP_SCBRA:
658     case OP_SCBRAPOS:
659 ph10 836 return cc + 1 + LINK_SIZE + IMM2_SIZE;
660 ph10 664
661 zherczeg 929 case OP_MARK:
662     return cc + 1 + 2 + cc[1];
663    
664 ph10 664 default:
665     return NULL;
666     }
667     }
668    
669 ph10 836 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
670 ph10 664 {
671     int localspace = 0;
672 ph10 836 pcre_uchar *alternative;
673 ph10 664 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
674     while (cc < ccend)
675     {
676     switch(*cc)
677     {
678 zherczeg 929 case OP_SET_SOM:
679     common->has_set_som = TRUE;
680     cc += 1;
681     break;
682    
683 ph10 664 case OP_ASSERT:
684     case OP_ASSERT_NOT:
685     case OP_ASSERTBACK:
686     case OP_ASSERTBACK_NOT:
687     case OP_ONCE:
688 zherczeg 726 case OP_ONCE_NC:
689 ph10 664 case OP_BRAPOS:
690     case OP_SBRA:
691     case OP_SBRAPOS:
692     case OP_SCOND:
693     localspace += sizeof(sljit_w);
694     cc += 1 + LINK_SIZE;
695     break;
696    
697     case OP_CBRAPOS:
698     case OP_SCBRAPOS:
699     localspace += sizeof(sljit_w);
700 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
701 ph10 664 break;
702    
703     case OP_COND:
704     /* Might be a hidden SCOND. */
705     alternative = cc + GET(cc, 1);
706     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
707     localspace += sizeof(sljit_w);
708     cc += 1 + LINK_SIZE;
709     break;
710    
711 zherczeg 920 case OP_RECURSE:
712     /* Set its value only once. */
713     if (common->recursive_head == 0)
714     {
715     common->recursive_head = common->ovector_start;
716     common->ovector_start += sizeof(sljit_w);
717     }
718     cc += 1 + LINK_SIZE;
719     break;
720    
721 zherczeg 929 case OP_MARK:
722     if (common->mark_ptr == 0)
723     {
724     common->mark_ptr = common->ovector_start;
725     common->ovector_start += sizeof(sljit_w);
726     }
727     cc += 1 + 2 + cc[1];
728     break;
729    
730 ph10 664 default:
731     cc = next_opcode(common, cc);
732     if (cc == NULL)
733     return -1;
734     break;
735     }
736     }
737     return localspace;
738     }
739    
740 ph10 836 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
741 ph10 664 {
742 ph10 836 pcre_uchar *cc = common->start;
743     pcre_uchar *alternative;
744 ph10 664 while (cc < ccend)
745     {
746     switch(*cc)
747     {
748     case OP_ASSERT:
749     case OP_ASSERT_NOT:
750     case OP_ASSERTBACK:
751     case OP_ASSERTBACK_NOT:
752     case OP_ONCE:
753 zherczeg 726 case OP_ONCE_NC:
754 ph10 664 case OP_BRAPOS:
755     case OP_SBRA:
756     case OP_SBRAPOS:
757     case OP_SCOND:
758     common->localptrs[cc - common->start] = localptr;
759     localptr += sizeof(sljit_w);
760     cc += 1 + LINK_SIZE;
761     break;
762    
763     case OP_CBRAPOS:
764     case OP_SCBRAPOS:
765     common->localptrs[cc - common->start] = localptr;
766     localptr += sizeof(sljit_w);
767 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
768 ph10 664 break;
769    
770     case OP_COND:
771     /* Might be a hidden SCOND. */
772     alternative = cc + GET(cc, 1);
773     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
774     {
775     common->localptrs[cc - common->start] = localptr;
776     localptr += sizeof(sljit_w);
777     }
778     cc += 1 + LINK_SIZE;
779     break;
780    
781     default:
782     cc = next_opcode(common, cc);
783     SLJIT_ASSERT(cc != NULL);
784     break;
785     }
786     }
787     }
788    
789     /* Returns with -1 if no need for frame. */
790 ph10 836 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
791 ph10 664 {
792 ph10 836 pcre_uchar *ccend = bracketend(cc);
793 ph10 664 int length = 0;
794     BOOL possessive = FALSE;
795 zherczeg 929 BOOL setsom_found = recursive;
796     BOOL setmark_found = recursive;
797 ph10 664
798     if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
799     {
800 zherczeg 696 length = 3;
801 ph10 664 possessive = TRUE;
802     }
803    
804     cc = next_opcode(common, cc);
805     SLJIT_ASSERT(cc != NULL);
806     while (cc < ccend)
807     switch(*cc)
808     {
809     case OP_SET_SOM:
810 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
811 ph10 664 if (!setsom_found)
812     {
813     length += 2;
814     setsom_found = TRUE;
815     }
816 zherczeg 929 cc += 1;
817 ph10 664 break;
818    
819 zherczeg 929 case OP_MARK:
820     SLJIT_ASSERT(common->mark_ptr != 0);
821     if (!setmark_found)
822     {
823     length += 2;
824     setmark_found = TRUE;
825     }
826     cc += 1 + 2 + cc[1];
827     break;
828    
829     case OP_RECURSE:
830     if (common->has_set_som && !setsom_found)
831     {
832     length += 2;
833     setsom_found = TRUE;
834     }
835     if (common->mark_ptr != 0 && !setmark_found)
836     {
837     length += 2;
838     setmark_found = TRUE;
839     }
840     cc += 1 + LINK_SIZE;
841     break;
842    
843 ph10 664 case OP_CBRA:
844     case OP_CBRAPOS:
845     case OP_SCBRA:
846     case OP_SCBRAPOS:
847     length += 3;
848 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
849 ph10 664 break;
850    
851     default:
852     cc = next_opcode(common, cc);
853     SLJIT_ASSERT(cc != NULL);
854     break;
855     }
856    
857     /* Possessive quantifiers can use a special case. */
858 zherczeg 726 if (SLJIT_UNLIKELY(possessive) && length == 3)
859 ph10 664 return -1;
860    
861     if (length > 0)
862 zherczeg 726 return length + 1;
863     return -1;
864 ph10 664 }
865    
866 ph10 836 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
867 ph10 664 {
868     DEFINE_COMPILER;
869 ph10 836 pcre_uchar *ccend = bracketend(cc);
870 zherczeg 929 BOOL setsom_found = recursive;
871     BOOL setmark_found = recursive;
872 ph10 664 int offset;
873    
874 zherczeg 726 /* >= 1 + shortest item size (2) */
875 zherczeg 906 SLJIT_UNUSED_ARG(stacktop);
876 zherczeg 726 SLJIT_ASSERT(stackpos >= stacktop + 2);
877 ph10 664
878     stackpos = STACK(stackpos);
879     if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
880     cc = next_opcode(common, cc);
881     SLJIT_ASSERT(cc != NULL);
882     while (cc < ccend)
883     switch(*cc)
884     {
885     case OP_SET_SOM:
886 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
887 ph10 664 if (!setsom_found)
888     {
889     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
890     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
891     stackpos += (int)sizeof(sljit_w);
892     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
893     stackpos += (int)sizeof(sljit_w);
894     setsom_found = TRUE;
895     }
896 zherczeg 929 cc += 1;
897 ph10 664 break;
898    
899 zherczeg 929 case OP_MARK:
900     SLJIT_ASSERT(common->mark_ptr != 0);
901     if (!setmark_found)
902     {
903     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
904     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
905     stackpos += (int)sizeof(sljit_w);
906     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
907     stackpos += (int)sizeof(sljit_w);
908     setmark_found = TRUE;
909     }
910     cc += 1 + 2 + cc[1];
911     break;
912    
913     case OP_RECURSE:
914     if (common->has_set_som && !setsom_found)
915     {
916     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
917     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
918     stackpos += (int)sizeof(sljit_w);
919     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
920     stackpos += (int)sizeof(sljit_w);
921     setsom_found = TRUE;
922     }
923     if (common->mark_ptr != 0 && !setmark_found)
924     {
925     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
926     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
927     stackpos += (int)sizeof(sljit_w);
928     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
929     stackpos += (int)sizeof(sljit_w);
930     setmark_found = TRUE;
931     }
932     cc += 1 + LINK_SIZE;
933     break;
934    
935 ph10 664 case OP_CBRA:
936     case OP_CBRAPOS:
937     case OP_SCBRA:
938     case OP_SCBRAPOS:
939     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
940     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
941     stackpos += (int)sizeof(sljit_w);
942     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
943     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
944     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
945     stackpos += (int)sizeof(sljit_w);
946     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
947     stackpos += (int)sizeof(sljit_w);
948    
949 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
950 ph10 664 break;
951    
952     default:
953     cc = next_opcode(common, cc);
954     SLJIT_ASSERT(cc != NULL);
955     break;
956     }
957    
958     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
959 zherczeg 726 SLJIT_ASSERT(stackpos == STACK(stacktop));
960 ph10 664 }
961    
962 ph10 836 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
963 ph10 664 {
964     int localsize = 2;
965 ph10 836 pcre_uchar *alternative;
966 ph10 664 /* Calculate the sum of the local variables. */
967     while (cc < ccend)
968     {
969     switch(*cc)
970     {
971     case OP_ASSERT:
972     case OP_ASSERT_NOT:
973     case OP_ASSERTBACK:
974     case OP_ASSERTBACK_NOT:
975     case OP_ONCE:
976 zherczeg 726 case OP_ONCE_NC:
977 ph10 664 case OP_BRAPOS:
978     case OP_SBRA:
979     case OP_SBRAPOS:
980     case OP_SCOND:
981     localsize++;
982     cc += 1 + LINK_SIZE;
983     break;
984    
985     case OP_CBRA:
986     case OP_SCBRA:
987     localsize++;
988 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
989 ph10 664 break;
990    
991     case OP_CBRAPOS:
992     case OP_SCBRAPOS:
993     localsize += 2;
994 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
995 ph10 664 break;
996    
997     case OP_COND:
998     /* Might be a hidden SCOND. */
999     alternative = cc + GET(cc, 1);
1000     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1001     localsize++;
1002     cc += 1 + LINK_SIZE;
1003     break;
1004    
1005     default:
1006     cc = next_opcode(common, cc);
1007     SLJIT_ASSERT(cc != NULL);
1008     break;
1009     }
1010     }
1011     SLJIT_ASSERT(cc == ccend);
1012     return localsize;
1013     }
1014    
1015 ph10 836 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1016 ph10 664 BOOL save, int stackptr, int stacktop)
1017     {
1018     DEFINE_COMPILER;
1019     int srcw[2];
1020     int count;
1021     BOOL tmp1next = TRUE;
1022     BOOL tmp1empty = TRUE;
1023     BOOL tmp2empty = TRUE;
1024 ph10 836 pcre_uchar *alternative;
1025 ph10 664 enum {
1026     start,
1027     loop,
1028     end
1029     } status;
1030    
1031     status = save ? start : loop;
1032     stackptr = STACK(stackptr - 2);
1033     stacktop = STACK(stacktop - 1);
1034    
1035     if (!save)
1036     {
1037     stackptr += sizeof(sljit_w);
1038     if (stackptr < stacktop)
1039     {
1040     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1041     stackptr += sizeof(sljit_w);
1042     tmp1empty = FALSE;
1043     }
1044     if (stackptr < stacktop)
1045     {
1046     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1047     stackptr += sizeof(sljit_w);
1048     tmp2empty = FALSE;
1049     }
1050     /* The tmp1next must be TRUE in either way. */
1051     }
1052    
1053     while (status != end)
1054     {
1055     count = 0;
1056     switch(status)
1057     {
1058     case start:
1059 zherczeg 920 SLJIT_ASSERT(save && common->recursive_head != 0);
1060 ph10 664 count = 1;
1061 zherczeg 920 srcw[0] = common->recursive_head;
1062 ph10 664 status = loop;
1063     break;
1064    
1065     case loop:
1066     if (cc >= ccend)
1067     {
1068     status = end;
1069     break;
1070     }
1071    
1072     switch(*cc)
1073     {
1074     case OP_ASSERT:
1075     case OP_ASSERT_NOT:
1076     case OP_ASSERTBACK:
1077     case OP_ASSERTBACK_NOT:
1078     case OP_ONCE:
1079 zherczeg 726 case OP_ONCE_NC:
1080 ph10 664 case OP_BRAPOS:
1081     case OP_SBRA:
1082     case OP_SBRAPOS:
1083     case OP_SCOND:
1084     count = 1;
1085 ph10 836 srcw[0] = PRIV_DATA(cc);
1086 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1087     cc += 1 + LINK_SIZE;
1088     break;
1089    
1090     case OP_CBRA:
1091     case OP_SCBRA:
1092     count = 1;
1093     srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1094 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1095 ph10 664 break;
1096    
1097     case OP_CBRAPOS:
1098     case OP_SCBRAPOS:
1099     count = 2;
1100     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1101 ph10 836 srcw[0] = PRIV_DATA(cc);
1102 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1103 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1104 ph10 664 break;
1105    
1106     case OP_COND:
1107     /* Might be a hidden SCOND. */
1108     alternative = cc + GET(cc, 1);
1109     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1110     {
1111     count = 1;
1112 ph10 836 srcw[0] = PRIV_DATA(cc);
1113 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1114     }
1115     cc += 1 + LINK_SIZE;
1116     break;
1117    
1118     default:
1119     cc = next_opcode(common, cc);
1120     SLJIT_ASSERT(cc != NULL);
1121     break;
1122     }
1123     break;
1124    
1125     case end:
1126     SLJIT_ASSERT_STOP();
1127     break;
1128     }
1129    
1130     while (count > 0)
1131     {
1132     count--;
1133     if (save)
1134     {
1135     if (tmp1next)
1136     {
1137     if (!tmp1empty)
1138     {
1139     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1140     stackptr += sizeof(sljit_w);
1141     }
1142     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1143     tmp1empty = FALSE;
1144     tmp1next = FALSE;
1145     }
1146     else
1147     {
1148     if (!tmp2empty)
1149     {
1150     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1151     stackptr += sizeof(sljit_w);
1152     }
1153     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1154     tmp2empty = FALSE;
1155     tmp1next = TRUE;
1156     }
1157     }
1158     else
1159     {
1160     if (tmp1next)
1161     {
1162     SLJIT_ASSERT(!tmp1empty);
1163     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1164     tmp1empty = stackptr >= stacktop;
1165     if (!tmp1empty)
1166     {
1167     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1168     stackptr += sizeof(sljit_w);
1169     }
1170     tmp1next = FALSE;
1171     }
1172     else
1173     {
1174     SLJIT_ASSERT(!tmp2empty);
1175     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1176     tmp2empty = stackptr >= stacktop;
1177     if (!tmp2empty)
1178     {
1179     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1180     stackptr += sizeof(sljit_w);
1181     }
1182     tmp1next = TRUE;
1183     }
1184     }
1185     }
1186     }
1187    
1188     if (save)
1189     {
1190     if (tmp1next)
1191     {
1192     if (!tmp1empty)
1193     {
1194     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1195     stackptr += sizeof(sljit_w);
1196     }
1197     if (!tmp2empty)
1198     {
1199     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1200     stackptr += sizeof(sljit_w);
1201     }
1202     }
1203     else
1204     {
1205     if (!tmp2empty)
1206     {
1207     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1208     stackptr += sizeof(sljit_w);
1209     }
1210     if (!tmp1empty)
1211     {
1212     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1213     stackptr += sizeof(sljit_w);
1214     }
1215     }
1216     }
1217     SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1218     }
1219    
1220     static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1221     {
1222     return (value & (value - 1)) == 0;
1223     }
1224    
1225     static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1226     {
1227     while (list)
1228     {
1229     /* sljit_set_label is clever enough to do nothing
1230     if either the jump or the label is NULL */
1231     sljit_set_label(list->jump, label);
1232     list = list->next;
1233     }
1234     }
1235    
1236     static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1237     {
1238     jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1239     if (list_item)
1240     {
1241     list_item->next = *list;
1242     list_item->jump = jump;
1243     *list = list_item;
1244     }
1245     }
1246    
1247     static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1248     {
1249     DEFINE_COMPILER;
1250     stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1251    
1252     if (list_item)
1253     {
1254     list_item->type = type;
1255     list_item->data = data;
1256     list_item->start = start;
1257     list_item->leave = LABEL();
1258     list_item->next = common->stubs;
1259     common->stubs = list_item;
1260     }
1261     }
1262    
1263     static void flush_stubs(compiler_common *common)
1264     {
1265     DEFINE_COMPILER;
1266     stub_list* list_item = common->stubs;
1267    
1268     while (list_item)
1269     {
1270     JUMPHERE(list_item->start);
1271     switch(list_item->type)
1272     {
1273     case stack_alloc:
1274     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1275     break;
1276     }
1277     JUMPTO(SLJIT_JUMP, list_item->leave);
1278     list_item = list_item->next;
1279     }
1280     common->stubs = NULL;
1281     }
1282    
1283 ph10 677 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1284     {
1285     DEFINE_COMPILER;
1286    
1287 zherczeg 695 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1288 ph10 677 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1289     }
1290    
1291 ph10 664 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1292     {
1293     /* May destroy all locals and registers except TMP2. */
1294     DEFINE_COMPILER;
1295    
1296     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1297     #ifdef DESTROY_REGISTERS
1298     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1299     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1300     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1301     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1302     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1303     #endif
1304     add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1305     }
1306    
1307     static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1308     {
1309     DEFINE_COMPILER;
1310     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1311     }
1312    
1313     static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1314     {
1315     DEFINE_COMPILER;
1316     struct sljit_label *loop;
1317     int i;
1318     /* At this point we can freely use all temporary registers. */
1319     /* TMP1 returns with begin - 1. */
1320 zherczeg 880 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1321 ph10 664 if (length < 8)
1322     {
1323     for (i = 0; i < length; i++)
1324     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1325     }
1326     else
1327     {
1328     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1329     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1330     loop = LABEL();
1331     OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1332     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1333     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1334     }
1335     }
1336    
1337 zherczeg 696 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1338 ph10 664 {
1339     DEFINE_COMPILER;
1340     struct sljit_label *loop;
1341     struct sljit_jump *earlyexit;
1342    
1343     /* At this point we can freely use all registers. */
1344 zherczeg 880 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1345 zherczeg 696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1346    
1347 ph10 664 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1348 zherczeg 929 if (common->mark_ptr != 0)
1349     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1350 ph10 664 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1351 zherczeg 929 if (common->mark_ptr != 0)
1352     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1353 ph10 664 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1354     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1355 zherczeg 880 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1356 ph10 664 /* Unlikely, but possible */
1357     earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1358     loop = LABEL();
1359 zherczeg 880 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1360     OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1361 ph10 664 /* Copy the integer value to the output buffer */
1362 ph10 836 #ifdef COMPILE_PCRE16
1363 zherczeg 880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1364 ph10 836 #endif
1365 zherczeg 880 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1366 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1367     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1368     JUMPHERE(earlyexit);
1369 zherczeg 696
1370     /* Calculate the return value, which is the maximum ovector value. */
1371     if (topbracket > 1)
1372     {
1373     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1374     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1375    
1376 zherczeg 880 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1377 zherczeg 696 loop = LABEL();
1378 zherczeg 715 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1379 zherczeg 696 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1380 zherczeg 880 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1381 zherczeg 696 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1382     }
1383     else
1384     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1385 ph10 664 }
1386    
1387 zherczeg 914 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1388     {
1389     DEFINE_COMPILER;
1390    
1391     SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1392 zherczeg 920 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1393 zherczeg 914
1394     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1395     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1396     OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1397     CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1398    
1399     /* Store match begin and end. */
1400     OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1401     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1402 zherczeg 920 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1403 zherczeg 914 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1404     #ifdef COMPILE_PCRE16
1405     OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1406     #endif
1407     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1408    
1409     OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1410     #ifdef COMPILE_PCRE16
1411     OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1412     #endif
1413     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1414    
1415     JUMPTO(SLJIT_JUMP, leave);
1416     }
1417    
1418     static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1419     {
1420     /* May destroy TMP1. */
1421     DEFINE_COMPILER;
1422     struct sljit_jump *jump;
1423    
1424     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1425     {
1426 zherczeg 920 /* The value of -1 must be kept for start_used_ptr! */
1427     OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1428     /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1429     is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1430 zherczeg 914 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1431 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1432 zherczeg 914 JUMPHERE(jump);
1433     }
1434     else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1435     {
1436 zherczeg 920 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1437     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1438 zherczeg 914 JUMPHERE(jump);
1439     }
1440     }
1441    
1442 ph10 836 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1443 ph10 664 {
1444     /* Detects if the character has an othercase. */
1445     unsigned int c;
1446    
1447 ph10 836 #ifdef SUPPORT_UTF
1448     if (common->utf)
1449 ph10 664 {
1450     GETCHAR(c, cc);
1451     if (c > 127)
1452     {
1453     #ifdef SUPPORT_UCP
1454     return c != UCD_OTHERCASE(c);
1455     #else
1456     return FALSE;
1457     #endif
1458     }
1459 ph10 836 #ifndef COMPILE_PCRE8
1460     return common->fcc[c] != c;
1461     #endif
1462 ph10 664 }
1463     else
1464     #endif
1465     c = *cc;
1466 ph10 836 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1467 ph10 664 }
1468    
1469     static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1470     {
1471     /* Returns with the othercase. */
1472 ph10 836 #ifdef SUPPORT_UTF
1473     if (common->utf && c > 127)
1474 ph10 664 {
1475     #ifdef SUPPORT_UCP
1476     return UCD_OTHERCASE(c);
1477     #else
1478     return c;
1479     #endif
1480     }
1481     #endif
1482 ph10 836 return TABLE_GET(c, common->fcc, c);
1483 ph10 664 }
1484    
1485 ph10 836 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1486 ph10 664 {
1487     /* Detects if the character and its othercase has only 1 bit difference. */
1488     unsigned int c, oc, bit;
1489 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1490 ph10 664 int n;
1491     #endif
1492    
1493 ph10 836 #ifdef SUPPORT_UTF
1494     if (common->utf)
1495 ph10 664 {
1496     GETCHAR(c, cc);
1497     if (c <= 127)
1498     oc = common->fcc[c];
1499     else
1500     {
1501     #ifdef SUPPORT_UCP
1502     oc = UCD_OTHERCASE(c);
1503     #else
1504     oc = c;
1505     #endif
1506     }
1507     }
1508     else
1509     {
1510     c = *cc;
1511 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1512 ph10 664 }
1513     #else
1514     c = *cc;
1515 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1516 ph10 664 #endif
1517    
1518     SLJIT_ASSERT(c != oc);
1519    
1520     bit = c ^ oc;
1521     /* Optimized for English alphabet. */
1522     if (c <= 127 && bit == 0x20)
1523     return (0 << 8) | 0x20;
1524    
1525     /* Since c != oc, they must have at least 1 bit difference. */
1526     if (!ispowerof2(bit))
1527     return 0;
1528    
1529 ph10 836 #ifdef COMPILE_PCRE8
1530    
1531     #ifdef SUPPORT_UTF
1532     if (common->utf && c > 127)
1533 ph10 664 {
1534 ph10 836 n = GET_EXTRALEN(*cc);
1535 ph10 664 while ((bit & 0x3f) == 0)
1536     {
1537     n--;
1538     bit >>= 6;
1539     }
1540     return (n << 8) | bit;
1541     }
1542 ph10 836 #endif /* SUPPORT_UTF */
1543 ph10 664 return (0 << 8) | bit;
1544 ph10 836
1545     #else /* COMPILE_PCRE8 */
1546    
1547     #ifdef COMPILE_PCRE16
1548     #ifdef SUPPORT_UTF
1549     if (common->utf && c > 65535)
1550     {
1551     if (bit >= (1 << 10))
1552     bit >>= 10;
1553     else
1554     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1555     }
1556     #endif /* SUPPORT_UTF */
1557     return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1558     #endif /* COMPILE_PCRE16 */
1559    
1560     #endif /* COMPILE_PCRE8 */
1561 ph10 664 }
1562    
1563 zherczeg 918 static void check_partial(compiler_common *common, BOOL force)
1564 ph10 664 {
1565 zherczeg 918 /* Checks whether a partial matching is occured. Does not modify registers. */
1566 ph10 664 DEFINE_COMPILER;
1567 zherczeg 918 struct sljit_jump *jump = NULL;
1568 zherczeg 914
1569 zherczeg 918 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1570    
1571 zherczeg 914 if (common->mode == JIT_COMPILE)
1572     return;
1573    
1574 zherczeg 920 if (!force)
1575     jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1576     else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1577     jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1578 zherczeg 918
1579 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1580 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1581 zherczeg 914 else
1582     {
1583     if (common->partialmatchlabel != NULL)
1584     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1585     else
1586     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1587     }
1588 zherczeg 918
1589     if (jump != NULL)
1590     JUMPHERE(jump);
1591 ph10 664 }
1592    
1593 zherczeg 914 static struct sljit_jump *check_str_end(compiler_common *common)
1594     {
1595     /* Does not affect registers. Usually used in a tight spot. */
1596     DEFINE_COMPILER;
1597     struct sljit_jump *jump;
1598     struct sljit_jump *nohit;
1599     struct sljit_jump *return_value;
1600    
1601     if (common->mode == JIT_COMPILE)
1602     return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1603    
1604     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1605     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1606     {
1607 zherczeg 920 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1608     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1609 zherczeg 914 JUMPHERE(nohit);
1610     return_value = JUMP(SLJIT_JUMP);
1611     }
1612     else
1613     {
1614 zherczeg 920 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1615 zherczeg 914 if (common->partialmatchlabel != NULL)
1616     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1617     else
1618     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1619     }
1620     JUMPHERE(jump);
1621     return return_value;
1622     }
1623    
1624     static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1625     {
1626     DEFINE_COMPILER;
1627     struct sljit_jump *jump;
1628    
1629     if (common->mode == JIT_COMPILE)
1630     {
1631     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1632     return;
1633     }
1634    
1635     /* Partial matching mode. */
1636     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1637 zherczeg 920 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1638 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1639     {
1640 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1641 zherczeg 914 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1642     }
1643     else
1644     {
1645     if (common->partialmatchlabel != NULL)
1646     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1647     else
1648     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1649     }
1650     JUMPHERE(jump);
1651     }
1652    
1653 ph10 664 static void read_char(compiler_common *common)
1654     {
1655     /* Reads the character into TMP1, updates STR_PTR.
1656     Does not check STR_END. TMP2 Destroyed. */
1657     DEFINE_COMPILER;
1658 ph10 836 #ifdef SUPPORT_UTF
1659 ph10 664 struct sljit_jump *jump;
1660     #endif
1661    
1662 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1663     #ifdef SUPPORT_UTF
1664     if (common->utf)
1665 ph10 664 {
1666 ph10 836 #ifdef COMPILE_PCRE8
1667 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1668 ph10 836 #else
1669     #ifdef COMPILE_PCRE16
1670     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1671     #endif
1672     #endif /* COMPILE_PCRE8 */
1673     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1674 ph10 664 JUMPHERE(jump);
1675     }
1676     #endif
1677 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1678 ph10 664 }
1679    
1680     static void peek_char(compiler_common *common)
1681     {
1682     /* Reads the character into TMP1, keeps STR_PTR.
1683     Does not check STR_END. TMP2 Destroyed. */
1684     DEFINE_COMPILER;
1685 ph10 836 #ifdef SUPPORT_UTF
1686 ph10 664 struct sljit_jump *jump;
1687     #endif
1688    
1689 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1690     #ifdef SUPPORT_UTF
1691     if (common->utf)
1692 ph10 664 {
1693 ph10 836 #ifdef COMPILE_PCRE8
1694 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1695 ph10 836 #else
1696     #ifdef COMPILE_PCRE16
1697     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1698     #endif
1699     #endif /* COMPILE_PCRE8 */
1700     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1701 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1702     JUMPHERE(jump);
1703     }
1704     #endif
1705     }
1706    
1707     static void read_char8_type(compiler_common *common)
1708     {
1709     /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1710     DEFINE_COMPILER;
1711 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1712 ph10 664 struct sljit_jump *jump;
1713     #endif
1714    
1715 ph10 836 #ifdef SUPPORT_UTF
1716     if (common->utf)
1717 ph10 664 {
1718 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1719     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1720     #ifdef COMPILE_PCRE8
1721 ph10 664 /* This can be an extra read in some situations, but hopefully
1722 ph10 836 it is needed in most cases. */
1723 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1724 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1725 ph10 836 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1726 ph10 664 JUMPHERE(jump);
1727 ph10 836 #else
1728     #ifdef COMPILE_PCRE16
1729     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1730     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1731     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1732     JUMPHERE(jump);
1733     /* Skip low surrogate if necessary. */
1734     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1735     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1736     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1737     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1738     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1739     #endif
1740     #endif /* COMPILE_PCRE8 */
1741 ph10 664 return;
1742     }
1743     #endif
1744 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1745     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1746     #ifdef COMPILE_PCRE16
1747     /* The ctypes array contains only 256 values. */
1748     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1749     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1750     #endif
1751     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1752     #ifdef COMPILE_PCRE16
1753     JUMPHERE(jump);
1754     #endif
1755 ph10 664 }
1756    
1757     static void skip_char_back(compiler_common *common)
1758     {
1759 ph10 836 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1760 ph10 664 DEFINE_COMPILER;
1761 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1762 ph10 664 struct sljit_label *label;
1763    
1764 ph10 836 if (common->utf)
1765 ph10 664 {
1766     label = LABEL();
1767 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1768     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1769 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1770     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1771     return;
1772     }
1773     #endif
1774 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1775     if (common->utf)
1776     {
1777     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1778     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1779     /* Skip low surrogate if necessary. */
1780     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1781     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1782     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1783     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1784     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1785     return;
1786     }
1787     #endif
1788     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1789 ph10 664 }
1790    
1791     static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1792     {
1793     /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1794     DEFINE_COMPILER;
1795    
1796     if (nltype == NLTYPE_ANY)
1797     {
1798     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1799     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1800     }
1801     else if (nltype == NLTYPE_ANYCRLF)
1802     {
1803     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1804     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1805     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1806     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1807     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1808     }
1809     else
1810     {
1811 ph10 836 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1812 ph10 664 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1813     }
1814     }
1815    
1816 ph10 836 #ifdef SUPPORT_UTF
1817    
1818     #ifdef COMPILE_PCRE8
1819     static void do_utfreadchar(compiler_common *common)
1820 ph10 664 {
1821 ph10 836 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1822 zherczeg 736 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1823 ph10 664 DEFINE_COMPILER;
1824     struct sljit_jump *jump;
1825    
1826     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1827     /* Searching for the first zero. */
1828     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1829     jump = JUMP(SLJIT_C_NOT_ZERO);
1830 ph10 836 /* Two byte sequence. */
1831     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1832     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1833 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1834     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1835     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1836     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1837 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1838 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1839     JUMPHERE(jump);
1840    
1841     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1842     jump = JUMP(SLJIT_C_NOT_ZERO);
1843 ph10 836 /* Three byte sequence. */
1844     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1845 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1846     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1847     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1848     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1849     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1850 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1851     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1852 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1853     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1854 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1855 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1856     JUMPHERE(jump);
1857    
1858 ph10 836 /* Four byte sequence. */
1859     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1860 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1861     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1862     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1863     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1864     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1865 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1866 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1867     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1868     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1869 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1870     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1871 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1872     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1873 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1874 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1875     }
1876    
1877 ph10 836 static void do_utfreadtype8(compiler_common *common)
1878 ph10 664 {
1879 ph10 836 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1880     of the character (>= 0xc0). Return value in TMP1. */
1881 ph10 664 DEFINE_COMPILER;
1882     struct sljit_jump *jump;
1883     struct sljit_jump *compare;
1884    
1885     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1886    
1887     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1888     jump = JUMP(SLJIT_C_NOT_ZERO);
1889 ph10 836 /* Two byte sequence. */
1890     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1891     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1892 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1893     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1894     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1895     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1896     compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1897     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1898     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1899    
1900     JUMPHERE(compare);
1901     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1902     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1903     JUMPHERE(jump);
1904    
1905     /* We only have types for characters less than 256. */
1906 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1907 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1908     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1909     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1910     }
1911    
1912 ph10 836 #else /* COMPILE_PCRE8 */
1913 ph10 664
1914 ph10 836 #ifdef COMPILE_PCRE16
1915     static void do_utfreadchar(compiler_common *common)
1916     {
1917     /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1918     of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1919     DEFINE_COMPILER;
1920     struct sljit_jump *jump;
1921    
1922     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1923     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1924     /* Do nothing, only return. */
1925     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1926    
1927     JUMPHERE(jump);
1928     /* Combine two 16 bit characters. */
1929     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1930     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1931     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1932     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1933     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1934     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1935     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1936     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1937     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1938     }
1939     #endif /* COMPILE_PCRE16 */
1940    
1941     #endif /* COMPILE_PCRE8 */
1942    
1943     #endif /* SUPPORT_UTF */
1944    
1945 ph10 664 #ifdef SUPPORT_UCP
1946    
1947     /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1948     #define UCD_BLOCK_MASK 127
1949     #define UCD_BLOCK_SHIFT 7
1950    
1951     static void do_getucd(compiler_common *common)
1952     {
1953     /* Search the UCD record for the character comes in TMP1.
1954     Returns chartype in TMP1 and UCD offset in TMP2. */
1955     DEFINE_COMPILER;
1956    
1957     SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1958    
1959     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1960     OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1961 ph10 836 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1962 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1963     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1964     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1965 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1966 ph10 664 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1967 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1968 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1969     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1970     }
1971     #endif
1972    
1973     static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1974     {
1975     DEFINE_COMPILER;
1976     struct sljit_label *mainloop;
1977     struct sljit_label *newlinelabel = NULL;
1978     struct sljit_jump *start;
1979     struct sljit_jump *end = NULL;
1980     struct sljit_jump *nl = NULL;
1981 ph10 836 #ifdef SUPPORT_UTF
1982     struct sljit_jump *singlechar;
1983 zherczeg 736 #endif
1984 ph10 664 jump_list *newline = NULL;
1985     BOOL newlinecheck = FALSE;
1986 ph10 836 BOOL readuchar = FALSE;
1987 ph10 664
1988     if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1989     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1990     newlinecheck = TRUE;
1991    
1992     if (firstline)
1993     {
1994     /* Search for the end of the first line. */
1995 zherczeg 920 SLJIT_ASSERT(common->first_line_end != 0);
1996 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1997 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1998 ph10 664
1999     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2000     {
2001     mainloop = LABEL();
2002 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2003 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2004 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2005     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2006 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2007     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2008 zherczeg 920 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2009 ph10 664 }
2010     else
2011     {
2012     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2013     mainloop = LABEL();
2014     /* Continual stores does not cause data dependency. */
2015 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2016 ph10 664 read_char(common);
2017     check_newlinechar(common, common->nltype, &newline, TRUE);
2018     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2019 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2020 ph10 664 set_jumps(newline, LABEL());
2021     }
2022    
2023     JUMPHERE(end);
2024     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2025     }
2026    
2027     start = JUMP(SLJIT_JUMP);
2028    
2029     if (newlinecheck)
2030     {
2031     newlinelabel = LABEL();
2032 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2033 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2034 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2035 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2036     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2037 ph10 836 #ifdef COMPILE_PCRE16
2038     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2039     #endif
2040 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2041     nl = JUMP(SLJIT_JUMP);
2042     }
2043    
2044     mainloop = LABEL();
2045    
2046     /* Increasing the STR_PTR here requires one less jump in the most common case. */
2047 ph10 836 #ifdef SUPPORT_UTF
2048     if (common->utf) readuchar = TRUE;
2049 ph10 664 #endif
2050 ph10 836 if (newlinecheck) readuchar = TRUE;
2051 ph10 664
2052 ph10 836 if (readuchar)
2053     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2054 ph10 664
2055     if (newlinecheck)
2056     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2057    
2058 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2059     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2060     if (common->utf)
2061 ph10 664 {
2062 ph10 836 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2063     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2064 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2065 ph10 836 JUMPHERE(singlechar);
2066 ph10 664 }
2067     #endif
2068 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2069     if (common->utf)
2070     {
2071     singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2072     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2073     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2074     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2075     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2076     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2077     JUMPHERE(singlechar);
2078     }
2079     #endif
2080 ph10 664 JUMPHERE(start);
2081    
2082     if (newlinecheck)
2083     {
2084     JUMPHERE(end);
2085     JUMPHERE(nl);
2086     }
2087    
2088     return mainloop;
2089     }
2090    
2091 ph10 836 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2092 ph10 664 {
2093     DEFINE_COMPILER;
2094     struct sljit_label *start;
2095     struct sljit_jump *leave;
2096     struct sljit_jump *found;
2097 ph10 836 pcre_uchar oc, bit;
2098 ph10 664
2099     if (firstline)
2100     {
2101     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2102 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2103 ph10 664 }
2104    
2105     start = LABEL();
2106     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2107 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2108 ph10 664
2109 ph10 836 oc = first_char;
2110     if (caseless)
2111     {
2112     oc = TABLE_GET(first_char, common->fcc, first_char);
2113     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2114     if (first_char > 127 && common->utf)
2115     oc = UCD_OTHERCASE(first_char);
2116     #endif
2117     }
2118     if (first_char == oc)
2119     found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2120 ph10 664 else
2121     {
2122 ph10 836 bit = first_char ^ oc;
2123 ph10 664 if (ispowerof2(bit))
2124     {
2125     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2126 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2127 ph10 664 }
2128     else
2129     {
2130 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2131 ph10 664 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2132     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2133     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2134     found = JUMP(SLJIT_C_NOT_ZERO);
2135     }
2136     }
2137    
2138 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2139     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2140     if (common->utf)
2141 ph10 664 {
2142 zherczeg 736 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2143 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2144 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2145     }
2146     #endif
2147 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2148     if (common->utf)
2149     {
2150     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2151     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2152     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2153     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2154     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2155     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2156     }
2157     #endif
2158 ph10 664 JUMPTO(SLJIT_JUMP, start);
2159     JUMPHERE(found);
2160     JUMPHERE(leave);
2161    
2162     if (firstline)
2163     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2164     }
2165    
2166     static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2167     {
2168     DEFINE_COMPILER;
2169     struct sljit_label *loop;
2170     struct sljit_jump *lastchar;
2171     struct sljit_jump *firstchar;
2172     struct sljit_jump *leave;
2173     struct sljit_jump *foundcr = NULL;
2174     struct sljit_jump *notfoundnl;
2175     jump_list *newline = NULL;
2176    
2177     if (firstline)
2178     {
2179     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2180 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2181 ph10 664 }
2182    
2183     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2184     {
2185     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2186     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2187     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2188     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2189     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2190    
2191 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2192 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2193     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2194 ph10 836 #ifdef COMPILE_PCRE16
2195     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2196     #endif
2197 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2198    
2199     loop = LABEL();
2200 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2201 ph10 664 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2202 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2203     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2204 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2205     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2206    
2207     JUMPHERE(leave);
2208     JUMPHERE(firstchar);
2209     JUMPHERE(lastchar);
2210    
2211     if (firstline)
2212     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2213     return;
2214     }
2215    
2216     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2217     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2218     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2219     skip_char_back(common);
2220    
2221     loop = LABEL();
2222     read_char(common);
2223     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2224     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2225     foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2226     check_newlinechar(common, common->nltype, &newline, FALSE);
2227     set_jumps(newline, loop);
2228    
2229     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2230     {
2231     leave = JUMP(SLJIT_JUMP);
2232     JUMPHERE(foundcr);
2233     notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2234 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2235 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2236     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2237 ph10 836 #ifdef COMPILE_PCRE16
2238     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2239     #endif
2240 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2241     JUMPHERE(notfoundnl);
2242     JUMPHERE(leave);
2243     }
2244     JUMPHERE(lastchar);
2245     JUMPHERE(firstchar);
2246    
2247     if (firstline)
2248     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2249     }
2250    
2251     static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2252     {
2253     DEFINE_COMPILER;
2254     struct sljit_label *start;
2255     struct sljit_jump *leave;
2256     struct sljit_jump *found;
2257 ph10 836 #ifndef COMPILE_PCRE8
2258     struct sljit_jump *jump;
2259     #endif
2260 ph10 664
2261     if (firstline)
2262     {
2263     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2264 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2265 ph10 664 }
2266    
2267     start = LABEL();
2268     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2269 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2270     #ifdef SUPPORT_UTF
2271     if (common->utf)
2272 zherczeg 736 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2273 ph10 664 #endif
2274 ph10 836 #ifndef COMPILE_PCRE8
2275     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2276     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2277     JUMPHERE(jump);
2278     #endif
2279 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2280     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2281     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2282     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2283     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2284     found = JUMP(SLJIT_C_NOT_ZERO);
2285    
2286 ph10 836 #ifdef SUPPORT_UTF
2287     if (common->utf)
2288 zherczeg 736 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2289     #endif
2290 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2291     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2292     if (common->utf)
2293 zherczeg 736 {
2294     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2295 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2296 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2297     }
2298 ph10 664 #endif
2299 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2300     if (common->utf)
2301     {
2302     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2303     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2304     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2305     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2306     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2307     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2308     }
2309     #endif
2310 ph10 664 JUMPTO(SLJIT_JUMP, start);
2311     JUMPHERE(found);
2312     JUMPHERE(leave);
2313    
2314     if (firstline)
2315     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2316     }
2317    
2318 ph10 836 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2319 ph10 664 {
2320     DEFINE_COMPILER;
2321     struct sljit_label *loop;
2322     struct sljit_jump *toolong;
2323     struct sljit_jump *alreadyfound;
2324     struct sljit_jump *found;
2325     struct sljit_jump *foundoc = NULL;
2326     struct sljit_jump *notfound;
2327 ph10 836 pcre_uchar oc, bit;
2328 ph10 664
2329 zherczeg 920 SLJIT_ASSERT(common->req_char_ptr != 0);
2330     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2331 ph10 664 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2332     toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2333     alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2334    
2335 ph10 836 if (has_firstchar)
2336     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2337 ph10 664 else
2338     OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2339    
2340     loop = LABEL();
2341     notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2342    
2343 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2344     oc = req_char;
2345     if (caseless)
2346     {
2347     oc = TABLE_GET(req_char, common->fcc, req_char);
2348     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2349     if (req_char > 127 && common->utf)
2350     oc = UCD_OTHERCASE(req_char);
2351     #endif
2352     }
2353     if (req_char == oc)
2354     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2355 ph10 664 else
2356     {
2357 ph10 836 bit = req_char ^ oc;
2358 ph10 664 if (ispowerof2(bit))
2359     {
2360     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2361 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2362 ph10 664 }
2363     else
2364     {
2365 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2366 ph10 664 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2367     }
2368     }
2369 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2370 ph10 664 JUMPTO(SLJIT_JUMP, loop);
2371    
2372     JUMPHERE(found);
2373     if (foundoc)
2374     JUMPHERE(foundoc);
2375 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2376 ph10 664 JUMPHERE(alreadyfound);
2377     JUMPHERE(toolong);
2378     return notfound;
2379     }
2380    
2381     static void do_revertframes(compiler_common *common)
2382     {
2383     DEFINE_COMPILER;
2384     struct sljit_jump *jump;
2385     struct sljit_label *mainloop;
2386    
2387     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2388 zherczeg 726 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2389 ph10 664
2390     /* Drop frames until we reach STACK_TOP. */
2391     mainloop = LABEL();
2392     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2393     jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2394     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2395     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2396     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2397     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2398     JUMPTO(SLJIT_JUMP, mainloop);
2399    
2400     JUMPHERE(jump);
2401     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2402     /* End of dropping frames. */
2403     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2404    
2405     JUMPHERE(jump);
2406     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2407 zherczeg 696 /* Set string begin. */
2408 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2409     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2410     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2411     JUMPTO(SLJIT_JUMP, mainloop);
2412    
2413     JUMPHERE(jump);
2414 zherczeg 929 if (common->mark_ptr != 0)
2415     {
2416     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
2417     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2418     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2419     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
2420     JUMPTO(SLJIT_JUMP, mainloop);
2421    
2422     JUMPHERE(jump);
2423     }
2424    
2425 ph10 664 /* Unknown command. */
2426     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2427     JUMPTO(SLJIT_JUMP, mainloop);
2428     }
2429    
2430     static void check_wordboundary(compiler_common *common)
2431     {
2432     DEFINE_COMPILER;
2433 zherczeg 914 struct sljit_jump *skipread;
2434 ph10 836 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2435 ph10 664 struct sljit_jump *jump;
2436 ph10 670 #endif
2437 ph10 664
2438 zherczeg 741 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2439 ph10 664
2440     sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2441     /* Get type of the previous char, and put it to LOCALS1. */
2442     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2443     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2444     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2445 zherczeg 914 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2446 ph10 664 skip_char_back(common);
2447 zherczeg 914 check_start_used_ptr(common);
2448 ph10 664 read_char(common);
2449    
2450     /* Testing char type. */
2451     #ifdef SUPPORT_UCP
2452 ph10 836 if (common->use_ucp)
2453 ph10 664 {
2454     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2455     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2456     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2457     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2458     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2459     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2460     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2461     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2462     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2463     JUMPHERE(jump);
2464     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2465     }
2466     else
2467     #endif
2468     {
2469 ph10 836 #ifndef COMPILE_PCRE8
2470     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2471     #elif defined SUPPORT_UTF
2472 ph10 664 /* Here LOCALS1 has already been zeroed. */
2473     jump = NULL;
2474 ph10 836 if (common->utf)
2475 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2476 ph10 836 #endif /* COMPILE_PCRE8 */
2477 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2478     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2479     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2480     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2481 ph10 836 #ifndef COMPILE_PCRE8
2482     JUMPHERE(jump);
2483     #elif defined SUPPORT_UTF
2484 ph10 664 if (jump != NULL)
2485     JUMPHERE(jump);
2486 ph10 836 #endif /* COMPILE_PCRE8 */
2487 ph10 664 }
2488 zherczeg 914 JUMPHERE(skipread);
2489 ph10 664
2490     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2491 zherczeg 914 skipread = check_str_end(common);
2492 ph10 664 peek_char(common);
2493    
2494     /* Testing char type. This is a code duplication. */
2495     #ifdef SUPPORT_UCP
2496 ph10 836 if (common->use_ucp)
2497 ph10 664 {
2498     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2499     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2500     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2501     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2502     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2503     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2504     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2505     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2506     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2507     JUMPHERE(jump);
2508     }
2509     else
2510     #endif
2511     {
2512 ph10 836 #ifndef COMPILE_PCRE8
2513     /* TMP2 may be destroyed by peek_char. */
2514 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2515 ph10 836 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2516     #elif defined SUPPORT_UTF
2517     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2518 ph10 664 jump = NULL;
2519 ph10 836 if (common->utf)
2520 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2521     #endif
2522     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2523     OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2524     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2525 ph10 836 #ifndef COMPILE_PCRE8
2526     JUMPHERE(jump);
2527     #elif defined SUPPORT_UTF
2528 ph10 664 if (jump != NULL)
2529     JUMPHERE(jump);
2530 ph10 836 #endif /* COMPILE_PCRE8 */
2531 ph10 664 }
2532 zherczeg 914 JUMPHERE(skipread);
2533 ph10 664
2534     OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2535     sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2536     }
2537    
2538     static void check_anynewline(compiler_common *common)
2539     {
2540     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2541     DEFINE_COMPILER;
2542    
2543     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2544    
2545     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2546     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2547     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2548     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2549 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2550     #ifdef COMPILE_PCRE8
2551     if (common->utf)
2552 ph10 664 {
2553 ph10 836 #endif
2554 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2555     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2556     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2557 ph10 836 #ifdef COMPILE_PCRE8
2558 ph10 664 }
2559     #endif
2560 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2561 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2562     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2563     }
2564    
2565     static void check_hspace(compiler_common *common)
2566     {
2567     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2568     DEFINE_COMPILER;
2569    
2570     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2571    
2572     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2573     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2574     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2575     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2576     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2577 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2578     #ifdef COMPILE_PCRE8
2579     if (common->utf)
2580 ph10 664 {
2581 ph10 836 #endif
2582 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2583     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2584     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2585     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2586     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2587     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2588     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2589     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2590     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2591     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2592     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2593     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2594     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2595 ph10 836 #ifdef COMPILE_PCRE8
2596 ph10 664 }
2597     #endif
2598 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2599 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2600    
2601     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2602     }
2603    
2604     static void check_vspace(compiler_common *common)
2605     {
2606     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2607     DEFINE_COMPILER;
2608    
2609     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2610    
2611     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2612     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2613     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2614     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2615 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2616     #ifdef COMPILE_PCRE8
2617     if (common->utf)
2618 ph10 664 {
2619 ph10 836 #endif
2620 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2621     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2622     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2623 ph10 836 #ifdef COMPILE_PCRE8
2624 ph10 664 }
2625     #endif
2626 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2627 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2628    
2629     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2630     }
2631    
2632     #define CHAR1 STR_END
2633     #define CHAR2 STACK_TOP
2634    
2635     static void do_casefulcmp(compiler_common *common)
2636     {
2637     DEFINE_COMPILER;
2638     struct sljit_jump *jump;
2639     struct sljit_label *label;
2640    
2641     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2642     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2643     OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2644     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2645 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2646     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2647 ph10 664
2648     label = LABEL();
2649 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2650     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2651 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2652 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2653 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2654    
2655     JUMPHERE(jump);
2656 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2657 ph10 664 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2658     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2659     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2660     }
2661    
2662     #define LCC_TABLE STACK_LIMIT
2663    
2664     static void do_caselesscmp(compiler_common *common)
2665     {
2666     DEFINE_COMPILER;
2667     struct sljit_jump *jump;
2668     struct sljit_label *label;
2669    
2670     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2671     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2672    
2673     OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2674     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2675     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2676     OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2677 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2678     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2679 ph10 664
2680     label = LABEL();
2681 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2682     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2683     #ifndef COMPILE_PCRE8
2684     jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2685     #endif
2686 ph10 664 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2687 ph10 836 #ifndef COMPILE_PCRE8
2688     JUMPHERE(jump);
2689     jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2690     #endif
2691 ph10 664 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2692 ph10 836 #ifndef COMPILE_PCRE8
2693     JUMPHERE(jump);
2694     #endif
2695 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2696 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2697 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2698    
2699     JUMPHERE(jump);
2700 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2701 ph10 664 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2702     OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2703     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2704     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2705     }
2706    
2707     #undef LCC_TABLE
2708     #undef CHAR1
2709     #undef CHAR2
2710    
2711 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2712 ph10 664
2713 ph10 836 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2714 ph10 664 {
2715     /* This function would be ineffective to do in JIT level. */
2716     int c1, c2;
2717 zherczeg 929 const pcre_uchar *src2 = args->uchar_ptr;
2718 ph10 836 const pcre_uchar *end2 = args->end;
2719 ph10 664
2720     while (src1 < end1)
2721     {
2722     if (src2 >= end2)
2723 zherczeg 915 return (pcre_uchar*)1;
2724 ph10 664 GETCHARINC(c1, src1);
2725     GETCHARINC(c2, src2);
2726 zherczeg 915 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2727 ph10 664 }
2728     return src2;
2729     }
2730    
2731 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2732 ph10 664
2733 ph10 836 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2734 ph10 664 compare_context* context, jump_list **fallbacks)
2735     {
2736     DEFINE_COMPILER;
2737     unsigned int othercasebit = 0;
2738 ph10 836 pcre_uchar *othercasechar = NULL;
2739     #ifdef SUPPORT_UTF
2740     int utflength;
2741 ph10 664 #endif
2742    
2743     if (caseless && char_has_othercase(common, cc))
2744     {
2745     othercasebit = char_get_othercase_bit(common, cc);
2746     SLJIT_ASSERT(othercasebit);
2747     /* Extracting bit difference info. */
2748 ph10 836 #ifdef COMPILE_PCRE8
2749     othercasechar = cc + (othercasebit >> 8);
2750 ph10 664 othercasebit &= 0xff;
2751 ph10 836 #else
2752     #ifdef COMPILE_PCRE16
2753     othercasechar = cc + (othercasebit >> 9);
2754     if ((othercasebit & 0x100) != 0)
2755     othercasebit = (othercasebit & 0xff) << 8;
2756     else
2757     othercasebit &= 0xff;
2758     #endif
2759     #endif
2760 ph10 664 }
2761    
2762     if (context->sourcereg == -1)
2763     {
2764 ph10 836 #ifdef COMPILE_PCRE8
2765 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2766     if (context->length >= 4)
2767     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2768     else if (context->length >= 2)
2769 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2770 ph10 664 else
2771     #endif
2772     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2773 ph10 836 #else
2774     #ifdef COMPILE_PCRE16
2775     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2776     if (context->length >= 4)
2777     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2778     else
2779     #endif
2780 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2781 ph10 836 #endif
2782     #endif /* COMPILE_PCRE8 */
2783 ph10 664 context->sourcereg = TMP2;
2784     }
2785    
2786 ph10 836 #ifdef SUPPORT_UTF
2787     utflength = 1;
2788     if (common->utf && HAS_EXTRALEN(*cc))
2789     utflength += GET_EXTRALEN(*cc);
2790 ph10 664
2791     do
2792     {
2793     #endif
2794    
2795 ph10 836 context->length -= IN_UCHARS(1);
2796 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2797    
2798     /* Unaligned read is supported. */
2799 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2800 ph10 664 {
2801 ph10 836 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2802     context->oc.asuchars[context->ucharptr] = othercasebit;
2803 ph10 664 }
2804     else
2805     {
2806 ph10 836 context->c.asuchars[context->ucharptr] = *cc;
2807     context->oc.asuchars[context->ucharptr] = 0;
2808 ph10 664 }
2809 ph10 836 context->ucharptr++;
2810 ph10 664
2811 ph10 836 #ifdef COMPILE_PCRE8
2812     if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2813     #else
2814     if (context->ucharptr >= 2 || context->length == 0)
2815     #endif
2816 ph10 664 {
2817     if (context->length >= 4)
2818     OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2819 ph10 836 #ifdef COMPILE_PCRE8
2820 ph10 664 else if (context->length >= 2)
2821 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2822 ph10 664 else if (context->length >= 1)
2823     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2824 ph10 836 #else
2825     else if (context->length >= 2)
2826 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2827 ph10 836 #endif
2828 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2829    
2830 ph10 836 switch(context->ucharptr)
2831 ph10 664 {
2832 ph10 836 case 4 / sizeof(pcre_uchar):
2833 ph10 664 if (context->oc.asint != 0)
2834     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2835     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2836     break;
2837    
2838 ph10 836 case 2 / sizeof(pcre_uchar):
2839 zherczeg 847 if (context->oc.asushort != 0)
2840     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2841     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2842 ph10 664 break;
2843    
2844 ph10 836 #ifdef COMPILE_PCRE8
2845 ph10 664 case 1:
2846     if (context->oc.asbyte != 0)
2847     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2848     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2849     break;
2850 ph10 836 #endif
2851 ph10 664
2852     default:
2853     SLJIT_ASSERT_STOP();
2854     break;
2855     }
2856 ph10 836 context->ucharptr = 0;
2857 ph10 664 }
2858 ph10 691
2859 ph10 664 #else
2860    
2861     /* Unaligned read is unsupported. */
2862 ph10 836 #ifdef COMPILE_PCRE8
2863 ph10 664 if (context->length > 0)
2864     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2865 ph10 836 #else
2866     if (context->length > 0)
2867     OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2868     #endif
2869 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2870    
2871 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2872 ph10 664 {
2873     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2874     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2875     }
2876     else
2877     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2878    
2879     #endif
2880    
2881     cc++;
2882 ph10 836 #ifdef SUPPORT_UTF
2883     utflength--;
2884 ph10 664 }
2885 ph10 836 while (utflength > 0);
2886 ph10 664 #endif
2887    
2888     return cc;
2889     }
2890    
2891 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2892 ph10 664
2893     #define SET_TYPE_OFFSET(value) \
2894     if ((value) != typeoffset) \
2895     { \
2896     if ((value) > typeoffset) \
2897     OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2898     else \
2899     OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2900     } \
2901     typeoffset = (value);
2902    
2903     #define SET_CHAR_OFFSET(value) \
2904     if ((value) != charoffset) \
2905     { \
2906     if ((value) > charoffset) \
2907     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2908     else \
2909     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2910     } \
2911     charoffset = (value);
2912    
2913 ph10 836 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2914 ph10 664 {
2915     DEFINE_COMPILER;
2916     jump_list *found = NULL;
2917     jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2918     unsigned int c;
2919     int compares;
2920     struct sljit_jump *jump = NULL;
2921 ph10 836 pcre_uchar *ccbegin;
2922 ph10 664 #ifdef SUPPORT_UCP
2923     BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2924     BOOL charsaved = FALSE;
2925 zherczeg 715 int typereg = TMP1, scriptreg = TMP1;
2926     unsigned int typeoffset;
2927 ph10 664 #endif
2928 zherczeg 715 int invertcmp, numberofcmps;
2929     unsigned int charoffset;
2930 ph10 664
2931 ph10 836 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2932 zherczeg 914 fallback_at_str_end(common, fallbacks);
2933 ph10 664 read_char(common);
2934    
2935     if ((*cc++ & XCL_MAP) != 0)
2936     {
2937     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2938 ph10 836 #ifndef COMPILE_PCRE8
2939     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2940     #elif defined SUPPORT_UTF
2941     if (common->utf)
2942 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2943 ph10 836 #endif
2944 ph10 664
2945     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2946     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2947     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2948     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2949     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2950     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2951    
2952 ph10 836 #ifndef COMPILE_PCRE8
2953     JUMPHERE(jump);
2954     #elif defined SUPPORT_UTF
2955     if (common->utf)
2956 ph10 664 JUMPHERE(jump);
2957 ph10 836 #endif
2958 ph10 664 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2959     #ifdef SUPPORT_UCP
2960     charsaved = TRUE;
2961     #endif
2962 ph10 836 cc += 32 / sizeof(pcre_uchar);
2963 ph10 664 }
2964    
2965     /* Scanning the necessary info. */
2966     ccbegin = cc;
2967     compares = 0;
2968     while (*cc != XCL_END)
2969     {
2970     compares++;
2971     if (*cc == XCL_SINGLE)
2972     {
2973     cc += 2;
2974 ph10 836 #ifdef SUPPORT_UTF
2975     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2976 ph10 664 #endif
2977     #ifdef SUPPORT_UCP
2978     needschar = TRUE;
2979     #endif
2980     }
2981     else if (*cc == XCL_RANGE)
2982     {
2983     cc += 2;
2984 ph10 836 #ifdef SUPPORT_UTF
2985     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2986 ph10 664 #endif
2987     cc++;
2988 ph10 836 #ifdef SUPPORT_UTF
2989     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2990 ph10 664 #endif
2991     #ifdef SUPPORT_UCP
2992     needschar = TRUE;
2993     #endif
2994     }
2995     #ifdef SUPPORT_UCP
2996     else
2997     {
2998     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2999     cc++;
3000     switch(*cc)
3001     {
3002     case PT_ANY:
3003     break;
3004    
3005     case PT_LAMP:
3006     case PT_GC:
3007     case PT_PC:
3008     case PT_ALNUM:
3009     needstype = TRUE;
3010     break;
3011    
3012     case PT_SC:
3013     needsscript = TRUE;
3014     break;
3015    
3016     case PT_SPACE:
3017     case PT_PXSPACE:
3018     case PT_WORD:
3019     needstype = TRUE;
3020     needschar = TRUE;
3021     break;
3022    
3023     default:
3024     SLJIT_ASSERT_STOP();
3025     break;
3026     }
3027     cc += 2;
3028     }
3029     #endif
3030     }
3031    
3032     #ifdef SUPPORT_UCP
3033     /* Simple register allocation. TMP1 is preferred if possible. */
3034     if (needstype || needsscript)
3035     {
3036     if (needschar && !charsaved)
3037     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3038     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3039     if (needschar)
3040     {
3041     if (needstype)
3042     {
3043     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3044     typereg = RETURN_ADDR;
3045     }
3046    
3047     if (needsscript)
3048     scriptreg = TMP3;
3049     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3050     }
3051     else if (needstype && needsscript)
3052     scriptreg = TMP3;
3053     /* In all other cases only one of them was specified, and that can goes to TMP1. */
3054    
3055     if (needsscript)
3056     {
3057     if (scriptreg == TMP1)
3058     {
3059 ph10 836 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3060 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3061     }
3062     else
3063     {
3064     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3065 ph10 836 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3066 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3067     }
3068     }
3069     }
3070     #endif
3071    
3072     /* Generating code. */
3073     cc = ccbegin;
3074     charoffset = 0;
3075     numberofcmps = 0;
3076     #ifdef SUPPORT_UCP
3077     typeoffset = 0;
3078     #endif
3079    
3080     while (*cc != XCL_END)
3081     {
3082     compares--;
3083     invertcmp = (compares == 0 && list != fallbacks);
3084     jump = NULL;
3085    
3086     if (*cc == XCL_SINGLE)
3087     {
3088     cc ++;
3089 ph10 836 #ifdef SUPPORT_UTF
3090     if (common->utf)
3091 ph10 664 {
3092     GETCHARINC(c, cc);
3093     }
3094     else
3095     #endif
3096     c = *cc++;
3097    
3098     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3099     {
3100     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3101     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3102     numberofcmps++;
3103     }
3104     else if (numberofcmps > 0)
3105     {
3106     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3107     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3108     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3109     numberofcmps = 0;
3110     }
3111     else
3112     {
3113     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3114     numberofcmps = 0;
3115     }
3116     }
3117     else if (*cc == XCL_RANGE)
3118     {
3119     cc ++;
3120 ph10 836 #ifdef SUPPORT_UTF
3121     if (common->utf)
3122 ph10 664 {
3123     GETCHARINC(c, cc);
3124     }
3125     else
3126     #endif
3127     c = *cc++;
3128     SET_CHAR_OFFSET(c);
3129 ph10 836 #ifdef SUPPORT_UTF
3130     if (common->utf)
3131 ph10 664 {
3132     GETCHARINC(c, cc);
3133     }
3134     else
3135     #endif
3136     c = *cc++;
3137     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3138     {
3139     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3140     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3141     numberofcmps++;
3142     }
3143     else if (numberofcmps > 0)
3144     {
3145     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3146     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3147     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3148     numberofcmps = 0;
3149     }
3150     else
3151     {
3152     jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3153     numberofcmps = 0;
3154     }
3155     }
3156     #ifdef SUPPORT_UCP
3157     else
3158     {
3159     if (*cc == XCL_NOTPROP)
3160     invertcmp ^= 0x1;
3161     cc++;
3162     switch(*cc)
3163     {
3164     case PT_ANY:
3165     if (list != fallbacks)
3166     {
3167     if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3168     continue;
3169     }
3170     else if (cc[-1] == XCL_NOTPROP)
3171     continue;
3172     jump = JUMP(SLJIT_JUMP);
3173     break;
3174    
3175     case PT_LAMP:
3176     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3177     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3178     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3179     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3180     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3181     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3182     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3183     break;
3184    
3185     case PT_GC:
3186 ph10 836 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3187 ph10 664 SET_TYPE_OFFSET(c);
3188 ph10 836 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3189 ph10 664 break;
3190    
3191     case PT_PC:
3192     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3193     break;
3194    
3195     case PT_SC:
3196     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3197     break;
3198    
3199     case PT_SPACE:
3200     case PT_PXSPACE:
3201     if (*cc == PT_SPACE)
3202     {
3203     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3204     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3205     }
3206     SET_CHAR_OFFSET(9);
3207     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3208     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3209     if (*cc == PT_SPACE)
3210     JUMPHERE(jump);
3211    
3212     SET_TYPE_OFFSET(ucp_Zl);
3213     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3214     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3215     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3216     break;
3217    
3218     case PT_WORD:
3219     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3220     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3221     /* ... fall through */
3222    
3223     case PT_ALNUM:
3224     SET_TYPE_OFFSET(ucp_Ll);
3225     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3226     COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3227     SET_TYPE_OFFSET(ucp_Nd);
3228     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3229     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3230     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3231     break;
3232     }
3233     cc += 2;
3234     }
3235     #endif
3236    
3237     if (jump != NULL)
3238     add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3239     }
3240    
3241     if (found != NULL)
3242     set_jumps(found, LABEL());
3243     }
3244    
3245     #undef SET_TYPE_OFFSET
3246     #undef SET_CHAR_OFFSET
3247    
3248     #endif
3249    
3250 ph10 836 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3251 ph10 664 {
3252     DEFINE_COMPILER;
3253     int length;
3254     unsigned int c, oc, bit;
3255     compare_context context;
3256     struct sljit_jump *jump[4];
3257 ph10 836 #ifdef SUPPORT_UTF
3258 ph10 670 struct sljit_label *label;
3259 ph10 664 #ifdef SUPPORT_UCP
3260 ph10 836 pcre_uchar propdata[5];
3261 ph10 664 #endif
3262     #endif
3263    
3264     switch(type)
3265     {
3266     case OP_SOD:
3267     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3268     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3269     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3270     return cc;
3271    
3272     case OP_SOM:
3273     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3274     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3275     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3276     return cc;
3277    
3278     case OP_NOT_WORD_BOUNDARY:
3279     case OP_WORD_BOUNDARY:
3280     add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3281     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3282     return cc;
3283    
3284     case OP_NOT_DIGIT:
3285     case OP_DIGIT:
3286 zherczeg 914 fallback_at_str_end(common, fallbacks);
3287 ph10 664 read_char8_type(common);
3288     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3289     add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3290     return cc;
3291    
3292     case OP_NOT_WHITESPACE:
3293     case OP_WHITESPACE:
3294 zherczeg 914 fallback_at_str_end(common, fallbacks);
3295 ph10 664 read_char8_type(common);
3296     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3297     add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3298     return cc;
3299    
3300     case OP_NOT_WORDCHAR:
3301     case OP_WORDCHAR:
3302 zherczeg 914 fallback_at_str_end(common, fallbacks);
3303 ph10 664 read_char8_type(common);
3304     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3305     add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3306     return cc;
3307    
3308     case OP_ANY:
3309 zherczeg 914 fallback_at_str_end(common, fallbacks);
3310 ph10 664 read_char(common);
3311     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3312     {
3313     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3314 zherczeg 920 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3315 zherczeg 918 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3316     else
3317 zherczeg 920 jump[1] = check_str_end(common);
3318 zherczeg 918
3319 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3320 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3321 zherczeg 918 if (jump[1] != NULL)
3322     JUMPHERE(jump[1]);
3323 ph10 664 JUMPHERE(jump[0]);
3324     }
3325     else
3326     check_newlinechar(common, common->nltype, fallbacks, TRUE);
3327     return cc;
3328    
3329     case OP_ALLANY:
3330 zherczeg 914 fallback_at_str_end(common, fallbacks);
3331 ph10 836 #ifdef SUPPORT_UTF
3332     if (common->utf)
3333 ph10 664 {
3334 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3335     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3336     #ifdef COMPILE_PCRE8
3337 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3338 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3339 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3340 ph10 836 #else /* COMPILE_PCRE8 */
3341     #ifdef COMPILE_PCRE16
3342     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3343     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3344     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3345     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3346     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3347     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3348     #endif /* COMPILE_PCRE16 */
3349     #endif /* COMPILE_PCRE8 */
3350 zherczeg 736 JUMPHERE(jump[0]);
3351 ph10 664 return cc;
3352     }
3353     #endif
3354 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3355 ph10 664 return cc;
3356    
3357 zherczeg 736 case OP_ANYBYTE:
3358 zherczeg 914 fallback_at_str_end(common, fallbacks);
3359 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3360 zherczeg 736 return cc;
3361    
3362 ph10 836 #ifdef SUPPORT_UTF
3363 ph10 664 #ifdef SUPPORT_UCP
3364     case OP_NOTPROP:
3365     case OP_PROP:
3366     propdata[0] = 0;
3367     propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3368     propdata[2] = cc[0];
3369     propdata[3] = cc[1];
3370     propdata[4] = XCL_END;
3371     compile_xclass_hotpath(common, propdata, fallbacks);
3372     return cc + 2;
3373     #endif
3374     #endif
3375    
3376     case OP_ANYNL:
3377 zherczeg 914 fallback_at_str_end(common, fallbacks);
3378 ph10 664 read_char(common);
3379     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3380 zherczeg 918 /* We don't need to handle soft partial matching case. */
3381     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3382     jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3383     else
3384     jump[1] = check_str_end(common);
3385 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3386 ph10 664 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3387 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3388 ph10 664 jump[3] = JUMP(SLJIT_JUMP);
3389     JUMPHERE(jump[0]);
3390     check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3391     JUMPHERE(jump[1]);
3392     JUMPHERE(jump[2]);
3393     JUMPHERE(jump[3]);
3394     return cc;
3395    
3396     case OP_NOT_HSPACE:
3397     case OP_HSPACE:
3398 zherczeg 914 fallback_at_str_end(common, fallbacks);
3399 ph10 664 read_char(common);
3400     add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3401     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3402     return cc;
3403    
3404     case OP_NOT_VSPACE:
3405     case OP_VSPACE:
3406 zherczeg 914 fallback_at_str_end(common, fallbacks);
3407 ph10 664 read_char(common);
3408     add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3409     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3410     return cc;
3411    
3412     #ifdef SUPPORT_UCP
3413     case OP_EXTUNI:
3414 zherczeg 914 fallback_at_str_end(common, fallbacks);
3415 ph10 664 read_char(common);
3416     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3417     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3418     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3419    
3420     label = LABEL();
3421     jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3422     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3423     read_char(common);
3424     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3425     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3426     CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3427    
3428     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3429     JUMPHERE(jump[0]);
3430 zherczeg 915 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3431     {
3432     jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3433 zherczeg 918 /* Since we successfully read a char above, partial matching must occure. */
3434     check_partial(common, TRUE);
3435 zherczeg 915 JUMPHERE(jump[0]);
3436     }
3437 ph10 664 return cc;
3438     #endif
3439    
3440     case OP_EODN:
3441 zherczeg 918 /* Requires rather complex checks. */
3442 ph10 664 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3443     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3444     {
3445 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3446     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3447 zherczeg 918 if (common->mode == JIT_COMPILE)
3448     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3449     else
3450     {
3451     jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3452     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3453     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3454     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3455     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3456     add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3457     check_partial(common, TRUE);
3458     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3459     JUMPHERE(jump[1]);
3460     }
3461 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3462 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3463     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3464     }
3465     else if (common->nltype == NLTYPE_FIXED)
3466     {
3467 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3468     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3469 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3470     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3471     }
3472     else
3473     {
3474 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3475 ph10 664 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3476 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3477 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3478     jump[2] = JUMP(SLJIT_C_GREATER);
3479     add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3480 ph10 836 /* Equal. */
3481     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3482 ph10 664 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3483     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3484    
3485     JUMPHERE(jump[1]);
3486     if (common->nltype == NLTYPE_ANYCRLF)
3487     {
3488 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3489 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3490     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3491     }
3492     else
3493     {
3494     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3495     read_char(common);
3496     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3497     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3498     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3499     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3500     }
3501     JUMPHERE(jump[2]);
3502     JUMPHERE(jump[3]);
3503     }
3504     JUMPHERE(jump[0]);
3505 zherczeg 918 check_partial(common, FALSE);
3506 ph10 664 return cc;
3507    
3508     case OP_EOD:
3509 zherczeg 918 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3510     check_partial(common, FALSE);
3511 ph10 664 return cc;
3512    
3513     case OP_CIRC:
3514     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3515     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3516     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3517     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3518     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3519     return cc;
3520    
3521     case OP_CIRCM:
3522     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3523     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3524     jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3525     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3526     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3527     jump[0] = JUMP(SLJIT_JUMP);
3528     JUMPHERE(jump[1]);
3529    
3530 zherczeg 914 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3531 ph10 664 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3532     {
3533 ph10 836 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3534 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3535 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3536     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3537 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3538     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3539     }
3540     else
3541     {
3542     skip_char_back(common);
3543     read_char(common);
3544     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3545     }
3546     JUMPHERE(jump[0]);
3547     return cc;
3548    
3549     case OP_DOLL:
3550     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3551     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3552     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3553    
3554     if (!common->endonly)
3555     compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3556     else
3557 zherczeg 914 {
3558 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3559 zherczeg 918 check_partial(common, FALSE);
3560 zherczeg 914 }
3561 ph10 664 return cc;
3562    
3563     case OP_DOLLM:
3564     jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3565     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3566     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3567     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3568 zherczeg 918 check_partial(common, FALSE);
3569 ph10 664 jump[0] = JUMP(SLJIT_JUMP);
3570     JUMPHERE(jump[1]);
3571    
3572     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3573     {
3574 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3575     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3576 zherczeg 918 if (common->mode == JIT_COMPILE)
3577     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3578     else
3579     {
3580     jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3581     /* STR_PTR = STR_END - IN_UCHARS(1) */
3582     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3583     check_partial(common, TRUE);
3584     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3585     JUMPHERE(jump[1]);
3586     }
3587    
3588 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3589 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3590     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3591     }
3592     else
3593     {
3594     peek_char(common);
3595     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3596     }
3597     JUMPHERE(jump[0]);
3598     return cc;
3599    
3600     case OP_CHAR:
3601     case OP_CHARI:
3602     length = 1;
3603 ph10 836 #ifdef SUPPORT_UTF
3604     if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3605 ph10 664 #endif
3606 zherczeg 914 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3607 ph10 664 {
3608 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3609 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3610    
3611 ph10 836 context.length = IN_UCHARS(length);
3612 ph10 664 context.sourcereg = -1;
3613     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3614 ph10 836 context.ucharptr = 0;
3615 ph10 664 #endif
3616     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3617     }
3618 zherczeg 914 fallback_at_str_end(common, fallbacks);
3619 ph10 664 read_char(common);
3620 ph10 836 #ifdef SUPPORT_UTF
3621     if (common->utf)
3622 ph10 664 {
3623     GETCHAR(c, cc);
3624     }
3625     else
3626     #endif
3627     c = *cc;
3628 zherczeg 914 if (type == OP_CHAR || !char_has_othercase(common, cc))
3629     {
3630     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3631     return cc + length;
3632     }
3633     oc = char_othercase(common, c);
3634     bit = c ^ oc;
3635     if (ispowerof2(bit))
3636     {
3637     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3638     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3639     return cc + length;
3640     }
3641 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3642     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3643     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3644     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3645     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3646     return cc + length;
3647    
3648     case OP_NOT:
3649     case OP_NOTI:
3650 zherczeg 914 fallback_at_str_end(common, fallbacks);
3651 ph10 664 length = 1;
3652 ph10 836 #ifdef SUPPORT_UTF
3653     if (common->utf)
3654 ph10 664 {
3655 ph10 836 #ifdef COMPILE_PCRE8
3656     c = *cc;
3657     if (c < 128)
3658 ph10 664 {
3659     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3660     if (type == OP_NOT || !char_has_othercase(common, cc))
3661     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3662     else
3663     {
3664     /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3665 zherczeg 736 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3666     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3667 ph10 664 }
3668     /* Skip the variable-length character. */
3669 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3670 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3671 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3672 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3673     JUMPHERE(jump[0]);
3674 ph10 836 return cc + 1;
3675 ph10 664 }
3676     else
3677 ph10 836 #endif /* COMPILE_PCRE8 */
3678     {
3679     GETCHARLEN(c, cc, length);
3680 ph10 664 read_char(common);
3681 ph10 836 }
3682 ph10 664 }
3683     else
3684 ph10 836 #endif /* SUPPORT_UTF */
3685 ph10 664 {
3686 ph10 836 read_char(common);
3687 ph10 664 c = *cc;
3688     }
3689    
3690     if (type == OP_NOT || !char_has_othercase(common, cc))
3691     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3692     else
3693     {
3694     oc = char_othercase(common, c);
3695     bit = c ^ oc;
3696     if (ispowerof2(bit))
3697     {
3698     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3699     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3700     }
3701     else
3702     {
3703     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3704     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3705     }
3706     }
3707 zherczeg 924 return cc + length;
3708 ph10 664
3709     case OP_CLASS:
3710     case OP_NCLASS:
3711 zherczeg 914 fallback_at_str_end(common, fallbacks);
3712 ph10 664 read_char(common);
3713 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3714 ph10 664 jump[0] = NULL;
3715 ph10 836 #ifdef COMPILE_PCRE8
3716     /* This check only affects 8 bit mode. In other modes, we
3717     always need to compare the value with 255. */
3718     if (common->utf)
3719     #endif /* COMPILE_PCRE8 */
3720 ph10 664 {
3721     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3722     if (type == OP_CLASS)
3723     {
3724     add_jump(compiler, fallbacks, jump[0]);
3725     jump[0] = NULL;
3726     }
3727     }
3728 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3729 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3730     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3731     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3732     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3733     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3734     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3735 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3736 ph10 664 if (jump[0] != NULL)
3737     JUMPHERE(jump[0]);
3738 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3739     return cc + 32 / sizeof(pcre_uchar);
3740 ph10 664
3741 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3742 ph10 664 case OP_XCLASS:
3743     compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3744     return cc + GET(cc, 0) - 1;
3745     #endif
3746    
3747     case OP_REVERSE:
3748     length = GET(cc, 0);
3749     SLJIT_ASSERT(length > 0);
3750     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3751 ph10 836 #ifdef SUPPORT_UTF
3752     if (common->utf)
3753 ph10 664 {
3754 ph10 836 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3755 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3756     label = LABEL();
3757 ph10 836 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3758 ph10 664 skip_char_back(common);
3759     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3760     JUMPTO(SLJIT_C_NOT_ZERO, label);
3761     }
3762 zherczeg 914 else
3763 ph10 664 #endif
3764 zherczeg 914 {
3765     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3766     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3767     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3768     }
3769     check_start_used_ptr(common);
3770 ph10 664 return cc + LINK_SIZE;
3771     }
3772     SLJIT_ASSERT_STOP();
3773     return cc;
3774     }
3775    
3776 ph10 836 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3777 ph10 664 {
3778     /* This function consumes at least one input character. */
3779     /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3780     DEFINE_COMPILER;
3781 ph10 836 pcre_uchar *ccbegin = cc;
3782 ph10 664 compare_context context;
3783     int size;
3784    
3785     context.length = 0;
3786     do
3787     {
3788     if (cc >= ccend)
3789     break;
3790    
3791     if (*cc == OP_CHAR)
3792     {
3793     size = 1;
3794 ph10 836 #ifdef SUPPORT_UTF
3795     if (common->utf && HAS_EXTRALEN(cc[1]))
3796     size += GET_EXTRALEN(cc[1]);
3797 ph10 664 #endif
3798     }
3799     else if (*cc == OP_CHARI)
3800     {
3801     size = 1;
3802 ph10 836 #ifdef SUPPORT_UTF
3803     if (common->utf)
3804 ph10 664 {
3805     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3806     size = 0;
3807 ph10 836 else if (HAS_EXTRALEN(cc[1]))
3808     size += GET_EXTRALEN(cc[1]);
3809 ph10 664 }
3810 ph10 691 else
3811 ph10 664 #endif
3812     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3813     size = 0;
3814     }
3815     else
3816     size = 0;
3817    
3818     cc += 1 + size;
3819 ph10 836 context.length += IN_UCHARS(size);
3820 ph10 664 }
3821     while (size > 0 && context.length <= 128);
3822    
3823     cc = ccbegin;
3824     if (context.length > 0)
3825     {
3826     /* We have a fixed-length byte sequence. */
3827     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3828     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3829    
3830     context.sourcereg = -1;
3831     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3832 ph10 836 context.ucharptr = 0;
3833 ph10 664 #endif
3834     do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3835     return cc;
3836     }
3837    
3838     /* A non-fixed length character will be checked if length == 0. */
3839     return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3840     }
3841    
3842 ph10 836 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3843 ph10 664 {
3844     DEFINE_COMPILER;
3845     int offset = GET2(cc, 1) << 1;
3846    
3847     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3848     if (!common->jscript_compat)
3849     {
3850     if (fallbacks == NULL)
3851     {
3852 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3853 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3854     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3855     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3856     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3857     return JUMP(SLJIT_C_NOT_ZERO);
3858     }
3859     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3860     }
3861     return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3862     }
3863    
3864     /* Forward definitions. */
3865 ph10 836 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3866 ph10 664 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3867    
3868     #define PUSH_FALLBACK(size, ccstart, error) \
3869     do \
3870     { \
3871     fallback = sljit_alloc_memory(compiler, (size)); \
3872     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3873     return error; \
3874     memset(fallback, 0, size); \
3875     fallback->prev = parent->top; \
3876     fallback->cc = (ccstart); \
3877     parent->top = fallback; \
3878     } \
3879     while (0)
3880    
3881     #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3882     do \
3883     { \
3884     fallback = sljit_alloc_memory(compiler, (size)); \
3885     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3886     return; \
3887     memset(fallback, 0, size); \
3888     fallback->prev = parent->top; \
3889     fallback->cc = (ccstart); \
3890     parent->top = fallback; \
3891     } \
3892     while (0)
3893    
3894 zherczeg 914 #define FALLBACK_AS(type) ((type *)fallback)
3895 ph10 664
3896 ph10 836 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3897 ph10 664 {
3898     DEFINE_COMPILER;
3899     int offset = GET2(cc, 1) << 1;
3900     struct sljit_jump *jump = NULL;
3901 zherczeg 915 struct sljit_jump *partial;
3902     struct sljit_jump *nopartial;
3903 ph10 664
3904     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3905 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3906 ph10 664 if (withchecks && !common->jscript_compat)
3907     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3908    
3909 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3910     if (common->utf && *cc == OP_REFI)
3911 ph10 664 {
3912     SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3913     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3914     if (withchecks)
3915     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3916    
3917     /* Needed to save important temporary registers. */
3918     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3919     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3920 zherczeg 929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
3921 ph10 836 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3922 ph10 664 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3923 zherczeg 915 if (common->mode == JIT_COMPILE)
3924     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3925     else
3926     {
3927     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3928     nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3929 zherczeg 918 check_partial(common, FALSE);
3930 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3931     JUMPHERE(nopartial);
3932     }
3933 ph10 664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3934     }
3935     else
3936 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3937 ph10 664 {
3938     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3939     if (withchecks)
3940     jump = JUMP(SLJIT_C_ZERO);
3941 zherczeg 914
3942 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3943 zherczeg 915 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3944     if (common->mode == JIT_COMPILE)
3945     add_jump(compiler, fallbacks, partial);
3946 ph10 664
3947     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3948     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3949 zherczeg 915
3950     if (common->mode != JIT_COMPILE)
3951     {
3952     nopartial = JUMP(SLJIT_JUMP);
3953     JUMPHERE(partial);
3954     /* TMP2 -= STR_END - STR_PTR */
3955     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3956     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3957     partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3958     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3959     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3960     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3961     JUMPHERE(partial);
3962 zherczeg 918 check_partial(common, FALSE);
3963 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3964     JUMPHERE(nopartial);
3965     }
3966 ph10 664 }
3967    
3968     if (jump != NULL)
3969     {
3970     if (emptyfail)
3971     add_jump(compiler, fallbacks, jump);
3972     else
3973     JUMPHERE(jump);
3974     }
3975 ph10 836 return cc + 1 + IMM2_SIZE;
3976 ph10 664 }
3977    
3978 ph10 836 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3979 ph10 664 {
3980     DEFINE_COMPILER;
3981     fallback_common *fallback;
3982 ph10 836 pcre_uchar type;
3983 ph10 664 struct sljit_label *label;
3984     struct sljit_jump *zerolength;
3985     struct sljit_jump *jump = NULL;
3986 ph10 836 pcre_uchar *ccbegin = cc;
3987 ph10 664 int min = 0, max = 0;
3988     BOOL minimize;
3989    
3990     PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3991    
3992 ph10 836 type = cc[1 + IMM2_SIZE];
3993 ph10 664 minimize = (type & 0x1) != 0;
3994     switch(type)
3995     {
3996     case OP_CRSTAR:
3997     case OP_CRMINSTAR:
3998     min = 0;
3999     max = 0;
4000 ph10 836 cc += 1 + IMM2_SIZE + 1;
4001 ph10 664 break;
4002     case OP_CRPLUS:
4003     case OP_CRMINPLUS:
4004     min = 1;
4005     max = 0;
4006 ph10 836 cc += 1 + IMM2_SIZE + 1;
4007 ph10 664 break;
4008     case OP_CRQUERY:
4009     case OP_CRMINQUERY:
4010     min = 0;
4011     max = 1;
4012 ph10 836 cc += 1 + IMM2_SIZE + 1;
4013 ph10 664 break;
4014     case OP_CRRANGE:
4015     case OP_CRMINRANGE:
4016 ph10 836 min = GET2(cc, 1 + IMM2_SIZE + 1);
4017     max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4018     cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4019 ph10 664 break;
4020     default:
4021     SLJIT_ASSERT_STOP();
4022     break;
4023     }
4024    
4025     if (!minimize)
4026     {
4027     if (min == 0)
4028     {
4029     allocate_stack(common, 2);
4030     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4031     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4032     /* Temporary release of STR_PTR. */
4033     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4034     zerolength = compile_ref_checks(common, ccbegin, NULL);
4035     /* Restore if not zero length. */
4036     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4037     }
4038     else
4039     {
4040     allocate_stack(common, 1);
4041     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4042     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4043     }
4044    
4045     if (min > 1 || max > 1)
4046     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4047    
4048     label = LABEL();
4049     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
4050    
4051     if (min > 1 || max > 1)
4052     {
4053     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4054     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4055     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4056     if (min > 1)
4057     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4058     if (max > 1)
4059     {
4060     jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4061     allocate_stack(common, 1);
4062     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4063     JUMPTO(SLJIT_JUMP, label);
4064     JUMPHERE(jump);
4065     }
4066     }
4067    
4068     if (max == 0)
4069     {
4070     /* Includes min > 1 case as well. */
4071     allocate_stack(common, 1);
4072     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4073     JUMPTO(SLJIT_JUMP, label);
4074     }
4075    
4076     JUMPHERE(zerolength);
4077     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4078 ph10 677
4079     decrease_call_count(common);
4080 ph10 664 return cc;
4081     }
4082    
4083     allocate_stack(common, 2);
4084     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4085     if (type != OP_CRMINSTAR)
4086     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4087    
4088     if (min == 0)
4089     {
4090     zerolength = compile_ref_checks(common, ccbegin, NULL);
4091     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4092     jump = JUMP(SLJIT_JUMP);
4093     }
4094     else
4095     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
4096    
4097     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4098     if (max > 0)
4099     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4100    
4101     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
4102     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4103    
4104     if (min > 1)
4105     {
4106     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));