/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 924 - (hide annotations) (download)
Wed Feb 22 10:23:56 2012 UTC (2 years, 5 months ago) by zherczeg
File MIME type: text/plain
File size: 225483 byte(s)
OP_NOT now supports any UTF character
1 ph10 664 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 664
11     The machine code generator part (this module) was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 664
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47     #include "pcre_internal.h"
48    
49     #ifdef SUPPORT_JIT
50    
51     /* All-in-one: Since we use the JIT compiler only from here,
52     we just include it. This way we don't need to touch the build
53     system files. */
54    
55 ph10 836 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56     #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 ph10 664 #define SLJIT_CONFIG_AUTO 1
58 zherczeg 741 #define SLJIT_CONFIG_STATIC 1
59 ph10 664 #define SLJIT_VERBOSE 0
60     #define SLJIT_DEBUG 0
61    
62     #include "sljit/sljitLir.c"
63    
64     #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 ph10 836 #error Unsupported architecture
66 ph10 664 #endif
67    
68     /* Allocate memory on the stack. Fast, but limited size. */
69     #define LOCAL_SPACE_SIZE 32768
70    
71     #define STACK_GROWTH_RATE 8192
72    
73     /* Enable to check that the allocation could destroy temporaries. */
74     #if defined SLJIT_DEBUG && SLJIT_DEBUG
75     #define DESTROY_REGISTERS 1
76     #endif
77    
78     /*
79     Short summary about the backtracking mechanism empolyed by the jit code generator:
80    
81     The code generator follows the recursive nature of the PERL compatible regular
82     expressions. The basic blocks of regular expressions are condition checkers
83     whose execute different commands depending on the result of the condition check.
84     The relationship between the operators can be horizontal (concatenation) and
85     vertical (sub-expression) (See struct fallback_common for more details).
86    
87     'ab' - 'a' and 'b' regexps are concatenated
88     'a+' - 'a' is the sub-expression of the '+' operator
89    
90     The condition checkers are boolean (true/false) checkers. Machine code is generated
91     for the checker itself and for the actions depending on the result of the checker.
92     The 'true' case is called as the hot path (expected path), and the other is called as
93     the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94     branches on the hot path.
95    
96     Greedy star operator (*) :
97     Hot path: match happens.
98     Fallback path: match failed.
99     Non-greedy star operator (*?) :
100     Hot path: no need to perform a match.
101     Fallback path: match is required.
102    
103     The following example shows how the code generated for a capturing bracket
104     with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105     we have the following regular expression:
106    
107     A(B|C)D
108    
109     The generated code will be the following:
110    
111     A hot path
112     '(' hot path (pushing arguments to the stack)
113     B hot path
114     ')' hot path (pushing arguments to the stack)
115     D hot path
116     return with successful match
117    
118     D fallback path
119     ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120     B fallback path
121     C expected path
122     jump to D hot path
123     C fallback path
124     A fallback path
125 ph10 691
126 ph10 664 Notice, that the order of fallback code paths are the opposite of the fast
127     code paths. In this way the topmost value on the stack is always belong
128     to the current fallback code path. The fallback code path must check
129     whether there is a next alternative. If so, it needs to jump back to
130     the hot path eventually. Otherwise it needs to clear out its own stack
131     frame and continue the execution on the fallback code paths.
132     */
133    
134     /*
135     Saved stack frames:
136    
137     Atomic blocks and asserts require reloading the values of local variables
138     when the fallback mechanism performed. Because of OP_RECURSE, the locals
139     are not necessarly known in compile time, thus we need a dynamic restore
140     mechanism.
141    
142     The stack frames are stored in a chain list, and have the following format:
143     ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144    
145     Thus we can restore the locals to a particular point in the stack.
146     */
147    
148     typedef struct jit_arguments {
149     /* Pointers first. */
150     struct sljit_stack *stack;
151 ph10 836 const pcre_uchar *str;
152     const pcre_uchar *begin;
153     const pcre_uchar *end;
154 ph10 664 int *offsets;
155 ph10 836 pcre_uchar *ptr;
156 ph10 664 /* Everything else after. */
157     int offsetcount;
158 ph10 677 int calllimit;
159 ph10 836 pcre_uint8 notbol;
160     pcre_uint8 noteol;
161     pcre_uint8 notempty;
162     pcre_uint8 notempty_atstart;
163 ph10 664 } jit_arguments;
164    
165 zherczeg 914 typedef struct executable_functions {
166 zherczeg 915 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
167 zherczeg 852 PUBL(jit_callback) callback;
168 ph10 664 void *userdata;
169 zherczeg 915 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
170 zherczeg 914 } executable_functions;
171 ph10 664
172     typedef struct jump_list {
173     struct sljit_jump *jump;
174     struct jump_list *next;
175     } jump_list;
176    
177 zherczeg 696 enum stub_types { stack_alloc };
178 ph10 664
179     typedef struct stub_list {
180     enum stub_types type;
181     int data;
182     struct sljit_jump *start;
183     struct sljit_label *leave;
184     struct stub_list *next;
185     } stub_list;
186    
187     typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188    
189     /* The following structure is the key data type for the recursive
190     code generator. It is allocated by compile_hotpath, and contains
191     the aguments for compile_fallbackpath. Must be the first member
192     of its descendants. */
193     typedef struct fallback_common {
194     /* Concatenation stack. */
195     struct fallback_common *prev;
196     jump_list *nextfallbacks;
197     /* Internal stack (for component operators). */
198     struct fallback_common *top;
199     jump_list *topfallbacks;
200     /* Opcode pointer. */
201 ph10 836 pcre_uchar *cc;
202 ph10 664 } fallback_common;
203    
204     typedef struct assert_fallback {
205     fallback_common common;
206     jump_list *condfailed;
207     /* Less than 0 (-1) if a frame is not needed. */
208     int framesize;
209     /* Points to our private memory word on the stack. */
210     int localptr;
211     /* For iterators. */
212     struct sljit_label *hotpath;
213     } assert_fallback;
214    
215     typedef struct bracket_fallback {
216     fallback_common common;
217     /* Where to coninue if an alternative is successfully matched. */
218     struct sljit_label *althotpath;
219     /* For rmin and rmax iterators. */
220     struct sljit_label *recursivehotpath;
221     /* For greedy ? operator. */
222     struct sljit_label *zerohotpath;
223     /* Contains the branches of a failed condition. */
224     union {
225     /* Both for OP_COND, OP_SCOND. */
226     jump_list *condfailed;
227     assert_fallback *assert;
228     /* For OP_ONCE. -1 if not needed. */
229     int framesize;
230     } u;
231     /* Points to our private memory word on the stack. */
232     int localptr;
233     } bracket_fallback;
234    
235     typedef struct bracketpos_fallback {
236     fallback_common common;
237     /* Points to our private memory word on the stack. */
238     int localptr;
239     /* Reverting stack is needed. */
240     int framesize;
241     /* Allocated stack size. */
242     int stacksize;
243     } bracketpos_fallback;
244    
245     typedef struct braminzero_fallback {
246     fallback_common common;
247     struct sljit_label *hotpath;
248     } braminzero_fallback;
249    
250     typedef struct iterator_fallback {
251     fallback_common common;
252     /* Next iteration. */
253     struct sljit_label *hotpath;
254     } iterator_fallback;
255    
256     typedef struct recurse_entry {
257     struct recurse_entry *next;
258     /* Contains the function entry. */
259     struct sljit_label *entry;
260     /* Collects the calls until the function is not created. */
261     jump_list *calls;
262     /* Points to the starting opcode. */
263     int start;
264     } recurse_entry;
265    
266     typedef struct recurse_fallback {
267     fallback_common common;
268     } recurse_fallback;
269    
270     typedef struct compiler_common {
271     struct sljit_compiler *compiler;
272 ph10 836 pcre_uchar *start;
273 zherczeg 920
274     /* Local stack area size and variable pointers. */
275 ph10 664 int localsize;
276     int *localptrs;
277 zherczeg 920 int cbraptr;
278     /* OVector starting point. Must be divisible by 2. */
279     int ovector_start;
280     /* Last known position of the requested byte. */
281     int req_char_ptr;
282     /* Head of the last recursion. */
283     int recursive_head;
284     /* First inspected character for partial matching. */
285     int start_used_ptr;
286     /* Starting pointer for partial soft matches. */
287     int hit_start;
288     /* End pointer of the first line. */
289     int first_line_end;
290    
291     /* Other */
292 ph10 836 const pcre_uint8 *fcc;
293 ph10 664 sljit_w lcc;
294 zherczeg 914 int mode;
295 ph10 664 int nltype;
296     int newline;
297     int bsr_nltype;
298     int endonly;
299     sljit_w ctypes;
300 zherczeg 741 sljit_uw name_table;
301     sljit_w name_count;
302     sljit_w name_entry_size;
303 zherczeg 920
304     /* Labels and jump lists. */
305 zherczeg 914 struct sljit_label *partialmatchlabel;
306 ph10 664 struct sljit_label *acceptlabel;
307     stub_list *stubs;
308     recurse_entry *entries;
309     recurse_entry *currententry;
310 zherczeg 914 jump_list *partialmatch;
311 ph10 664 jump_list *accept;
312 ph10 677 jump_list *calllimit;
313 ph10 664 jump_list *stackalloc;
314     jump_list *revertframes;
315     jump_list *wordboundary;
316     jump_list *anynewline;
317     jump_list *hspace;
318     jump_list *vspace;
319     jump_list *casefulcmp;
320     jump_list *caselesscmp;
321     BOOL jscript_compat;
322 ph10 836 #ifdef SUPPORT_UTF
323     BOOL utf;
324 ph10 664 #ifdef SUPPORT_UCP
325 ph10 836 BOOL use_ucp;
326 ph10 664 #endif
327 ph10 836 jump_list *utfreadchar;
328     #ifdef COMPILE_PCRE8
329     jump_list *utfreadtype8;
330 ph10 664 #endif
331 ph10 836 #endif /* SUPPORT_UTF */
332 ph10 664 #ifdef SUPPORT_UCP
333     jump_list *getucd;
334     #endif
335     } compiler_common;
336    
337     /* For byte_sequence_compare. */
338    
339     typedef struct compare_context {
340     int length;
341     int sourcereg;
342     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
343 ph10 836 int ucharptr;
344 ph10 664 union {
345 ph10 836 sljit_i asint;
346 zherczeg 847 sljit_uh asushort;
347 ph10 836 #ifdef COMPILE_PCRE8
348 ph10 664 sljit_ub asbyte;
349 ph10 836 sljit_ub asuchars[4];
350     #else
351     #ifdef COMPILE_PCRE16
352     sljit_uh asuchars[2];
353     #endif
354     #endif
355 ph10 664 } c;
356     union {
357 ph10 836 sljit_i asint;
358 zherczeg 847 sljit_uh asushort;
359 ph10 836 #ifdef COMPILE_PCRE8
360 ph10 664 sljit_ub asbyte;
361 ph10 836 sljit_ub asuchars[4];
362     #else
363     #ifdef COMPILE_PCRE16
364     sljit_uh asuchars[2];
365     #endif
366     #endif
367 ph10 664 } oc;
368     #endif
369     } compare_context;
370    
371     enum {
372     frame_end = 0,
373 zherczeg 696 frame_setstrbegin = -1
374 ph10 664 };
375    
376 zherczeg 883 /* Undefine sljit macros. */
377     #undef CMP
378    
379 ph10 664 /* Used for accessing the elements of the stack. */
380     #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
381    
382     #define TMP1 SLJIT_TEMPORARY_REG1
383     #define TMP2 SLJIT_TEMPORARY_REG3
384     #define TMP3 SLJIT_TEMPORARY_EREG2
385 zherczeg 880 #define STR_PTR SLJIT_SAVED_REG1
386     #define STR_END SLJIT_SAVED_REG2
387 ph10 664 #define STACK_TOP SLJIT_TEMPORARY_REG2
388 zherczeg 880 #define STACK_LIMIT SLJIT_SAVED_REG3
389     #define ARGUMENTS SLJIT_SAVED_EREG1
390     #define CALL_COUNT SLJIT_SAVED_EREG2
391 ph10 664 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
392    
393     /* Locals layout. */
394     /* These two locals can be used by the current opcode. */
395     #define LOCALS0 (0 * sizeof(sljit_w))
396     #define LOCALS1 (1 * sizeof(sljit_w))
397     /* Two local variables for possessive quantifiers (char1 cannot use them). */
398     #define POSSESSIVE0 (2 * sizeof(sljit_w))
399     #define POSSESSIVE1 (3 * sizeof(sljit_w))
400 ph10 677 /* Max limit of recursions. */
401 zherczeg 920 #define CALL_LIMIT (4 * sizeof(sljit_w))
402 ph10 664 /* The output vector is stored on the stack, and contains pointers
403     to characters. The vector data is divided into two groups: the first
404     group contains the start / end character pointers, and the second is
405     the start pointers when the end of the capturing group has not yet reached. */
406 zherczeg 920 #define OVECTOR_START (common->ovector_start)
407 ph10 664 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
408     #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
409 ph10 836 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
410 ph10 664
411 ph10 836 #ifdef COMPILE_PCRE8
412     #define MOV_UCHAR SLJIT_MOV_UB
413     #define MOVU_UCHAR SLJIT_MOVU_UB
414     #else
415     #ifdef COMPILE_PCRE16
416     #define MOV_UCHAR SLJIT_MOV_UH
417     #define MOVU_UCHAR SLJIT_MOVU_UH
418     #else
419     #error Unsupported compiling mode
420     #endif
421     #endif
422    
423 ph10 664 /* Shortcuts. */
424     #define DEFINE_COMPILER \
425     struct sljit_compiler *compiler = common->compiler
426     #define OP1(op, dst, dstw, src, srcw) \
427     sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
428     #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
429     sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
430     #define LABEL() \
431     sljit_emit_label(compiler)
432     #define JUMP(type) \
433     sljit_emit_jump(compiler, (type))
434     #define JUMPTO(type, label) \
435     sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
436     #define JUMPHERE(jump) \
437     sljit_set_label((jump), sljit_emit_label(compiler))
438     #define CMP(type, src1, src1w, src2, src2w) \
439     sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
440     #define CMPTO(type, src1, src1w, src2, src2w, label) \
441     sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
442     #define COND_VALUE(op, dst, dstw, type) \
443     sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
444    
445 ph10 836 static pcre_uchar* bracketend(pcre_uchar* cc)
446 ph10 664 {
447     SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
448     do cc += GET(cc, 1); while (*cc == OP_ALT);
449     SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
450     cc += 1 + LINK_SIZE;
451     return cc;
452     }
453    
454 ph10 691 /* Functions whose might need modification for all new supported opcodes:
455 ph10 664 next_opcode
456     get_localspace
457     set_localptrs
458     get_framesize
459     init_frame
460     get_localsize
461     copy_locals
462     compile_hotpath
463     compile_fallbackpath
464     */
465    
466 ph10 836 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
467 ph10 664 {
468     SLJIT_UNUSED_ARG(common);
469     switch(*cc)
470     {
471     case OP_SOD:
472     case OP_SOM:
473     case OP_SET_SOM:
474     case OP_NOT_WORD_BOUNDARY:
475     case OP_WORD_BOUNDARY:
476     case OP_NOT_DIGIT:
477     case OP_DIGIT:
478     case OP_NOT_WHITESPACE:
479     case OP_WHITESPACE:
480     case OP_NOT_WORDCHAR:
481     case OP_WORDCHAR:
482     case OP_ANY:
483     case OP_ALLANY:
484     case OP_ANYNL:
485     case OP_NOT_HSPACE:
486     case OP_HSPACE:
487     case OP_NOT_VSPACE:
488     case OP_VSPACE:
489     case OP_EXTUNI:
490     case OP_EODN:
491     case OP_EOD:
492     case OP_CIRC:
493     case OP_CIRCM:
494     case OP_DOLL:
495     case OP_DOLLM:
496     case OP_TYPESTAR:
497     case OP_TYPEMINSTAR:
498     case OP_TYPEPLUS:
499     case OP_TYPEMINPLUS:
500     case OP_TYPEQUERY:
501     case OP_TYPEMINQUERY:
502     case OP_TYPEPOSSTAR:
503     case OP_TYPEPOSPLUS:
504     case OP_TYPEPOSQUERY:
505     case OP_CRSTAR:
506     case OP_CRMINSTAR:
507     case OP_CRPLUS:
508     case OP_CRMINPLUS:
509     case OP_CRQUERY:
510     case OP_CRMINQUERY:
511     case OP_DEF:
512     case OP_BRAZERO:
513     case OP_BRAMINZERO:
514     case OP_BRAPOSZERO:
515     case OP_FAIL:
516     case OP_ACCEPT:
517     case OP_ASSERT_ACCEPT:
518     case OP_SKIPZERO:
519     return cc + 1;
520    
521 zherczeg 736 case OP_ANYBYTE:
522 ph10 836 #ifdef SUPPORT_UTF
523     if (common->utf) return NULL;
524 zherczeg 736 #endif
525     return cc + 1;
526    
527 ph10 664 case OP_CHAR:
528     case OP_CHARI:
529     case OP_NOT:
530     case OP_NOTI:
531     case OP_STAR:
532     case OP_MINSTAR:
533     case OP_PLUS:
534     case OP_MINPLUS:
535     case OP_QUERY:
536     case OP_MINQUERY:
537     case OP_POSSTAR:
538     case OP_POSPLUS:
539     case OP_POSQUERY:
540     case OP_STARI:
541     case OP_MINSTARI:
542     case OP_PLUSI:
543     case OP_MINPLUSI:
544     case OP_QUERYI:
545     case OP_MINQUERYI:
546     case OP_POSSTARI:
547     case OP_POSPLUSI:
548     case OP_POSQUERYI:
549     case OP_NOTSTAR:
550     case OP_NOTMINSTAR:
551     case OP_NOTPLUS:
552     case OP_NOTMINPLUS:
553     case OP_NOTQUERY:
554     case OP_NOTMINQUERY:
555     case OP_NOTPOSSTAR:
556     case OP_NOTPOSPLUS:
557     case OP_NOTPOSQUERY:
558     case OP_NOTSTARI:
559     case OP_NOTMINSTARI:
560     case OP_NOTPLUSI:
561     case OP_NOTMINPLUSI:
562     case OP_NOTQUERYI:
563     case OP_NOTMINQUERYI:
564     case OP_NOTPOSSTARI:
565     case OP_NOTPOSPLUSI:
566     case OP_NOTPOSQUERYI:
567     cc += 2;
568 ph10 836 #ifdef SUPPORT_UTF
569     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
570 ph10 664 #endif
571     return cc;
572    
573     case OP_UPTO:
574     case OP_MINUPTO:
575     case OP_EXACT:
576     case OP_POSUPTO:
577     case OP_UPTOI:
578     case OP_MINUPTOI:
579     case OP_EXACTI:
580     case OP_POSUPTOI:
581     case OP_NOTUPTO:
582     case OP_NOTMINUPTO:
583     case OP_NOTEXACT:
584     case OP_NOTPOSUPTO:
585     case OP_NOTUPTOI:
586     case OP_NOTMINUPTOI:
587     case OP_NOTEXACTI:
588     case OP_NOTPOSUPTOI:
589 ph10 836 cc += 2 + IMM2_SIZE;
590     #ifdef SUPPORT_UTF
591     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
592 ph10 664 #endif
593     return cc;
594    
595     case OP_NOTPROP:
596     case OP_PROP:
597 ph10 836 return cc + 1 + 2;
598    
599 ph10 664 case OP_TYPEUPTO:
600     case OP_TYPEMINUPTO:
601     case OP_TYPEEXACT:
602     case OP_TYPEPOSUPTO:
603     case OP_REF:
604     case OP_REFI:
605     case OP_CREF:
606 zherczeg 741 case OP_NCREF:
607     case OP_RREF:
608     case OP_NRREF:
609 ph10 664 case OP_CLOSE:
610 ph10 836 cc += 1 + IMM2_SIZE;
611 ph10 664 return cc;
612    
613     case OP_CRRANGE:
614     case OP_CRMINRANGE:
615 ph10 836 return cc + 1 + 2 * IMM2_SIZE;
616 ph10 664
617     case OP_CLASS:
618     case OP_NCLASS:
619 ph10 836 return cc + 1 + 32 / sizeof(pcre_uchar);
620 ph10 664
621 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
622 ph10 664 case OP_XCLASS:
623     return cc + GET(cc, 1);
624     #endif
625    
626     case OP_RECURSE:
627     case OP_ASSERT:
628     case OP_ASSERT_NOT:
629     case OP_ASSERTBACK:
630     case OP_ASSERTBACK_NOT:
631     case OP_REVERSE:
632     case OP_ONCE:
633 zherczeg 726 case OP_ONCE_NC:
634 ph10 664 case OP_BRA:
635     case OP_BRAPOS:
636     case OP_COND:
637     case OP_SBRA:
638     case OP_SBRAPOS:
639     case OP_SCOND:
640     case OP_ALT:
641     case OP_KET:
642     case OP_KETRMAX:
643     case OP_KETRMIN:
644     case OP_KETRPOS:
645     return cc + 1 + LINK_SIZE;
646    
647     case OP_CBRA:
648     case OP_CBRAPOS:
649     case OP_SCBRA:
650     case OP_SCBRAPOS:
651 ph10 836 return cc + 1 + LINK_SIZE + IMM2_SIZE;
652 ph10 664
653     default:
654     return NULL;
655     }
656     }
657    
658 ph10 836 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
659 ph10 664 {
660     int localspace = 0;
661 ph10 836 pcre_uchar *alternative;
662 ph10 664 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
663     while (cc < ccend)
664     {
665     switch(*cc)
666     {
667     case OP_ASSERT:
668     case OP_ASSERT_NOT:
669     case OP_ASSERTBACK:
670     case OP_ASSERTBACK_NOT:
671     case OP_ONCE:
672 zherczeg 726 case OP_ONCE_NC:
673 ph10 664 case OP_BRAPOS:
674     case OP_SBRA:
675     case OP_SBRAPOS:
676     case OP_SCOND:
677     localspace += sizeof(sljit_w);
678     cc += 1 + LINK_SIZE;
679     break;
680    
681     case OP_CBRAPOS:
682     case OP_SCBRAPOS:
683     localspace += sizeof(sljit_w);
684 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
685 ph10 664 break;
686    
687     case OP_COND:
688     /* Might be a hidden SCOND. */
689     alternative = cc + GET(cc, 1);
690     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
691     localspace += sizeof(sljit_w);
692     cc += 1 + LINK_SIZE;
693     break;
694    
695 zherczeg 920 case OP_RECURSE:
696     /* Set its value only once. */
697     if (common->recursive_head == 0)
698     {
699     common->recursive_head = common->ovector_start;
700     common->ovector_start += sizeof(sljit_w);
701     }
702     cc += 1 + LINK_SIZE;
703     break;
704    
705 ph10 664 default:
706     cc = next_opcode(common, cc);
707     if (cc == NULL)
708     return -1;
709     break;
710     }
711     }
712     return localspace;
713     }
714    
715 ph10 836 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
716 ph10 664 {
717 ph10 836 pcre_uchar *cc = common->start;
718     pcre_uchar *alternative;
719 ph10 664 while (cc < ccend)
720     {
721     switch(*cc)
722     {
723     case OP_ASSERT:
724     case OP_ASSERT_NOT:
725     case OP_ASSERTBACK:
726     case OP_ASSERTBACK_NOT:
727     case OP_ONCE:
728 zherczeg 726 case OP_ONCE_NC:
729 ph10 664 case OP_BRAPOS:
730     case OP_SBRA:
731     case OP_SBRAPOS:
732     case OP_SCOND:
733     common->localptrs[cc - common->start] = localptr;
734     localptr += sizeof(sljit_w);
735     cc += 1 + LINK_SIZE;
736     break;
737    
738     case OP_CBRAPOS:
739     case OP_SCBRAPOS:
740     common->localptrs[cc - common->start] = localptr;
741     localptr += sizeof(sljit_w);
742 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
743 ph10 664 break;
744    
745     case OP_COND:
746     /* Might be a hidden SCOND. */
747     alternative = cc + GET(cc, 1);
748     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
749     {
750     common->localptrs[cc - common->start] = localptr;
751     localptr += sizeof(sljit_w);
752     }
753     cc += 1 + LINK_SIZE;
754     break;
755    
756     default:
757     cc = next_opcode(common, cc);
758     SLJIT_ASSERT(cc != NULL);
759     break;
760     }
761     }
762     }
763    
764     /* Returns with -1 if no need for frame. */
765 ph10 836 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
766 ph10 664 {
767 ph10 836 pcre_uchar *ccend = bracketend(cc);
768 ph10 664 int length = 0;
769     BOOL possessive = FALSE;
770     BOOL setsom_found = FALSE;
771    
772     if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
773     {
774 zherczeg 696 length = 3;
775 ph10 664 possessive = TRUE;
776     }
777    
778     cc = next_opcode(common, cc);
779     SLJIT_ASSERT(cc != NULL);
780     while (cc < ccend)
781     switch(*cc)
782     {
783     case OP_SET_SOM:
784     case OP_RECURSE:
785     if (!setsom_found)
786     {
787     length += 2;
788     setsom_found = TRUE;
789     }
790     cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
791     break;
792    
793     case OP_CBRA:
794     case OP_CBRAPOS:
795     case OP_SCBRA:
796     case OP_SCBRAPOS:
797     length += 3;
798 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
799 ph10 664 break;
800    
801     default:
802     cc = next_opcode(common, cc);
803     SLJIT_ASSERT(cc != NULL);
804     break;
805     }
806    
807     /* Possessive quantifiers can use a special case. */
808 zherczeg 726 if (SLJIT_UNLIKELY(possessive) && length == 3)
809 ph10 664 return -1;
810    
811     if (length > 0)
812 zherczeg 726 return length + 1;
813     return -1;
814 ph10 664 }
815    
816 ph10 836 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
817 ph10 664 {
818     DEFINE_COMPILER;
819 ph10 836 pcre_uchar *ccend = bracketend(cc);
820 ph10 664 BOOL setsom_found = FALSE;
821     int offset;
822    
823 zherczeg 726 /* >= 1 + shortest item size (2) */
824 zherczeg 906 SLJIT_UNUSED_ARG(stacktop);
825 zherczeg 726 SLJIT_ASSERT(stackpos >= stacktop + 2);
826 ph10 664
827     stackpos = STACK(stackpos);
828     if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
829     cc = next_opcode(common, cc);
830     SLJIT_ASSERT(cc != NULL);
831     while (cc < ccend)
832     switch(*cc)
833     {
834     case OP_SET_SOM:
835     case OP_RECURSE:
836     if (!setsom_found)
837     {
838     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
839     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
840     stackpos += (int)sizeof(sljit_w);
841     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
842     stackpos += (int)sizeof(sljit_w);
843     setsom_found = TRUE;
844     }
845     cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
846     break;
847    
848     case OP_CBRA:
849     case OP_CBRAPOS:
850     case OP_SCBRA:
851     case OP_SCBRAPOS:
852     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
853     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
854     stackpos += (int)sizeof(sljit_w);
855     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
856     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
857     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
858     stackpos += (int)sizeof(sljit_w);
859     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
860     stackpos += (int)sizeof(sljit_w);
861    
862 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
863 ph10 664 break;
864    
865     default:
866     cc = next_opcode(common, cc);
867     SLJIT_ASSERT(cc != NULL);
868     break;
869     }
870    
871     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
872 zherczeg 726 SLJIT_ASSERT(stackpos == STACK(stacktop));
873 ph10 664 }
874    
875 ph10 836 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
876 ph10 664 {
877     int localsize = 2;
878 ph10 836 pcre_uchar *alternative;
879 ph10 664 /* Calculate the sum of the local variables. */
880     while (cc < ccend)
881     {
882     switch(*cc)
883     {
884     case OP_ASSERT:
885     case OP_ASSERT_NOT:
886     case OP_ASSERTBACK:
887     case OP_ASSERTBACK_NOT:
888     case OP_ONCE:
889 zherczeg 726 case OP_ONCE_NC:
890 ph10 664 case OP_BRAPOS:
891     case OP_SBRA:
892     case OP_SBRAPOS:
893     case OP_SCOND:
894     localsize++;
895     cc += 1 + LINK_SIZE;
896     break;
897    
898     case OP_CBRA:
899     case OP_SCBRA:
900     localsize++;
901 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
902 ph10 664 break;
903    
904     case OP_CBRAPOS:
905     case OP_SCBRAPOS:
906     localsize += 2;
907 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
908 ph10 664 break;
909    
910     case OP_COND:
911     /* Might be a hidden SCOND. */
912     alternative = cc + GET(cc, 1);
913     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
914     localsize++;
915     cc += 1 + LINK_SIZE;
916     break;
917    
918     default:
919     cc = next_opcode(common, cc);
920     SLJIT_ASSERT(cc != NULL);
921     break;
922     }
923     }
924     SLJIT_ASSERT(cc == ccend);
925     return localsize;
926     }
927    
928 ph10 836 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
929 ph10 664 BOOL save, int stackptr, int stacktop)
930     {
931     DEFINE_COMPILER;
932     int srcw[2];
933     int count;
934     BOOL tmp1next = TRUE;
935     BOOL tmp1empty = TRUE;
936     BOOL tmp2empty = TRUE;
937 ph10 836 pcre_uchar *alternative;
938 ph10 664 enum {
939     start,
940     loop,
941     end
942     } status;
943    
944     status = save ? start : loop;
945     stackptr = STACK(stackptr - 2);
946     stacktop = STACK(stacktop - 1);
947    
948     if (!save)
949     {
950     stackptr += sizeof(sljit_w);
951     if (stackptr < stacktop)
952     {
953     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
954     stackptr += sizeof(sljit_w);
955     tmp1empty = FALSE;
956     }
957     if (stackptr < stacktop)
958     {
959     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
960     stackptr += sizeof(sljit_w);
961     tmp2empty = FALSE;
962     }
963     /* The tmp1next must be TRUE in either way. */
964     }
965    
966     while (status != end)
967     {
968     count = 0;
969     switch(status)
970     {
971     case start:
972 zherczeg 920 SLJIT_ASSERT(save && common->recursive_head != 0);
973 ph10 664 count = 1;
974 zherczeg 920 srcw[0] = common->recursive_head;
975 ph10 664 status = loop;
976     break;
977    
978     case loop:
979     if (cc >= ccend)
980     {
981     status = end;
982     break;
983     }
984    
985     switch(*cc)
986     {
987     case OP_ASSERT:
988     case OP_ASSERT_NOT:
989     case OP_ASSERTBACK:
990     case OP_ASSERTBACK_NOT:
991     case OP_ONCE:
992 zherczeg 726 case OP_ONCE_NC:
993 ph10 664 case OP_BRAPOS:
994     case OP_SBRA:
995     case OP_SBRAPOS:
996     case OP_SCOND:
997     count = 1;
998 ph10 836 srcw[0] = PRIV_DATA(cc);
999 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1000     cc += 1 + LINK_SIZE;
1001     break;
1002    
1003     case OP_CBRA:
1004     case OP_SCBRA:
1005     count = 1;
1006     srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1007 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1008 ph10 664 break;
1009    
1010     case OP_CBRAPOS:
1011     case OP_SCBRAPOS:
1012     count = 2;
1013     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1014 ph10 836 srcw[0] = PRIV_DATA(cc);
1015 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1016 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1017 ph10 664 break;
1018    
1019     case OP_COND:
1020     /* Might be a hidden SCOND. */
1021     alternative = cc + GET(cc, 1);
1022     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1023     {
1024     count = 1;
1025 ph10 836 srcw[0] = PRIV_DATA(cc);
1026 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1027     }
1028     cc += 1 + LINK_SIZE;
1029     break;
1030    
1031     default:
1032     cc = next_opcode(common, cc);
1033     SLJIT_ASSERT(cc != NULL);
1034     break;
1035     }
1036     break;
1037    
1038     case end:
1039     SLJIT_ASSERT_STOP();
1040     break;
1041     }
1042    
1043     while (count > 0)
1044     {
1045     count--;
1046     if (save)
1047     {
1048     if (tmp1next)
1049     {
1050     if (!tmp1empty)
1051     {
1052     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1053     stackptr += sizeof(sljit_w);
1054     }
1055     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1056     tmp1empty = FALSE;
1057     tmp1next = FALSE;
1058     }
1059     else
1060     {
1061     if (!tmp2empty)
1062     {
1063     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1064     stackptr += sizeof(sljit_w);
1065     }
1066     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1067     tmp2empty = FALSE;
1068     tmp1next = TRUE;
1069     }
1070     }
1071     else
1072     {
1073     if (tmp1next)
1074     {
1075     SLJIT_ASSERT(!tmp1empty);
1076     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1077     tmp1empty = stackptr >= stacktop;
1078     if (!tmp1empty)
1079     {
1080     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1081     stackptr += sizeof(sljit_w);
1082     }
1083     tmp1next = FALSE;
1084     }
1085     else
1086     {
1087     SLJIT_ASSERT(!tmp2empty);
1088     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1089     tmp2empty = stackptr >= stacktop;
1090     if (!tmp2empty)
1091     {
1092     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1093     stackptr += sizeof(sljit_w);
1094     }
1095     tmp1next = TRUE;
1096     }
1097     }
1098     }
1099     }
1100    
1101     if (save)
1102     {
1103     if (tmp1next)
1104     {
1105     if (!tmp1empty)
1106     {
1107     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1108     stackptr += sizeof(sljit_w);
1109     }
1110     if (!tmp2empty)
1111     {
1112     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1113     stackptr += sizeof(sljit_w);
1114     }
1115     }
1116     else
1117     {
1118     if (!tmp2empty)
1119     {
1120     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1121     stackptr += sizeof(sljit_w);
1122     }
1123     if (!tmp1empty)
1124     {
1125     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1126     stackptr += sizeof(sljit_w);
1127     }
1128     }
1129     }
1130     SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1131     }
1132    
1133     static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1134     {
1135     return (value & (value - 1)) == 0;
1136     }
1137    
1138     static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1139     {
1140     while (list)
1141     {
1142     /* sljit_set_label is clever enough to do nothing
1143     if either the jump or the label is NULL */
1144     sljit_set_label(list->jump, label);
1145     list = list->next;
1146     }
1147     }
1148    
1149     static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1150     {
1151     jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1152     if (list_item)
1153     {
1154     list_item->next = *list;
1155     list_item->jump = jump;
1156     *list = list_item;
1157     }
1158     }
1159    
1160     static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1161     {
1162     DEFINE_COMPILER;
1163     stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1164    
1165     if (list_item)
1166     {
1167     list_item->type = type;
1168     list_item->data = data;
1169     list_item->start = start;
1170     list_item->leave = LABEL();
1171     list_item->next = common->stubs;
1172     common->stubs = list_item;
1173     }
1174     }
1175    
1176     static void flush_stubs(compiler_common *common)
1177     {
1178     DEFINE_COMPILER;
1179     stub_list* list_item = common->stubs;
1180    
1181     while (list_item)
1182     {
1183     JUMPHERE(list_item->start);
1184     switch(list_item->type)
1185     {
1186     case stack_alloc:
1187     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1188     break;
1189     }
1190     JUMPTO(SLJIT_JUMP, list_item->leave);
1191     list_item = list_item->next;
1192     }
1193     common->stubs = NULL;
1194     }
1195    
1196 ph10 677 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1197     {
1198     DEFINE_COMPILER;
1199    
1200 zherczeg 695 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1201 ph10 677 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1202     }
1203    
1204 ph10 664 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1205     {
1206     /* May destroy all locals and registers except TMP2. */
1207     DEFINE_COMPILER;
1208    
1209     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1210     #ifdef DESTROY_REGISTERS
1211     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1212     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1213     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1214     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1215     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1216     #endif
1217     add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1218     }
1219    
1220     static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1221     {
1222     DEFINE_COMPILER;
1223     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1224     }
1225    
1226     static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1227     {
1228     DEFINE_COMPILER;
1229     struct sljit_label *loop;
1230     int i;
1231     /* At this point we can freely use all temporary registers. */
1232     /* TMP1 returns with begin - 1. */
1233 zherczeg 880 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1234 ph10 664 if (length < 8)
1235     {
1236     for (i = 0; i < length; i++)
1237     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1238     }
1239     else
1240     {
1241     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1242     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1243     loop = LABEL();
1244     OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1245     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1246     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1247     }
1248     }
1249    
1250 zherczeg 696 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1251 ph10 664 {
1252     DEFINE_COMPILER;
1253     struct sljit_label *loop;
1254     struct sljit_jump *earlyexit;
1255    
1256     /* At this point we can freely use all registers. */
1257 zherczeg 880 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1258 zherczeg 696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1259    
1260 ph10 664 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1261     OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1262     OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1263     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1264 zherczeg 880 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1265 ph10 664 /* Unlikely, but possible */
1266     earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1267     loop = LABEL();
1268 zherczeg 880 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1269     OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1270 ph10 664 /* Copy the integer value to the output buffer */
1271 ph10 836 #ifdef COMPILE_PCRE16
1272 zherczeg 880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1273 ph10 836 #endif
1274 zherczeg 880 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1275 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1276     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1277     JUMPHERE(earlyexit);
1278 zherczeg 696
1279     /* Calculate the return value, which is the maximum ovector value. */
1280     if (topbracket > 1)
1281     {
1282     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1283     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1284    
1285 zherczeg 880 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1286 zherczeg 696 loop = LABEL();
1287 zherczeg 715 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1288 zherczeg 696 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1289 zherczeg 880 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1290 zherczeg 696 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1291     }
1292     else
1293     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1294 ph10 664 }
1295    
1296 zherczeg 914 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1297     {
1298     DEFINE_COMPILER;
1299    
1300     SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1301 zherczeg 920 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1302 zherczeg 914
1303     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1304     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1305     OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1306     CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1307    
1308     /* Store match begin and end. */
1309     OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1310     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1311 zherczeg 920 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1312 zherczeg 914 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1313     #ifdef COMPILE_PCRE16
1314     OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1315     #endif
1316     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1317    
1318     OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1319     #ifdef COMPILE_PCRE16
1320     OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1321     #endif
1322     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1323    
1324     JUMPTO(SLJIT_JUMP, leave);
1325     }
1326    
1327     static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1328     {
1329     /* May destroy TMP1. */
1330     DEFINE_COMPILER;
1331     struct sljit_jump *jump;
1332    
1333     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1334     {
1335 zherczeg 920 /* The value of -1 must be kept for start_used_ptr! */
1336     OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1337     /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1338     is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1339 zherczeg 914 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1340 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1341 zherczeg 914 JUMPHERE(jump);
1342     }
1343     else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1344     {
1345 zherczeg 920 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1346     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1347 zherczeg 914 JUMPHERE(jump);
1348     }
1349     }
1350    
1351 ph10 836 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1352 ph10 664 {
1353     /* Detects if the character has an othercase. */
1354     unsigned int c;
1355    
1356 ph10 836 #ifdef SUPPORT_UTF
1357     if (common->utf)
1358 ph10 664 {
1359     GETCHAR(c, cc);
1360     if (c > 127)
1361     {
1362     #ifdef SUPPORT_UCP
1363     return c != UCD_OTHERCASE(c);
1364     #else
1365     return FALSE;
1366     #endif
1367     }
1368 ph10 836 #ifndef COMPILE_PCRE8
1369     return common->fcc[c] != c;
1370     #endif
1371 ph10 664 }
1372     else
1373     #endif
1374     c = *cc;
1375 ph10 836 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1376 ph10 664 }
1377    
1378     static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1379     {
1380     /* Returns with the othercase. */
1381 ph10 836 #ifdef SUPPORT_UTF
1382     if (common->utf && c > 127)
1383 ph10 664 {
1384     #ifdef SUPPORT_UCP
1385     return UCD_OTHERCASE(c);
1386     #else
1387     return c;
1388     #endif
1389     }
1390     #endif
1391 ph10 836 return TABLE_GET(c, common->fcc, c);
1392 ph10 664 }
1393    
1394 ph10 836 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1395 ph10 664 {
1396     /* Detects if the character and its othercase has only 1 bit difference. */
1397     unsigned int c, oc, bit;
1398 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1399 ph10 664 int n;
1400     #endif
1401    
1402 ph10 836 #ifdef SUPPORT_UTF
1403     if (common->utf)
1404 ph10 664 {
1405     GETCHAR(c, cc);
1406     if (c <= 127)
1407     oc = common->fcc[c];
1408     else
1409     {
1410     #ifdef SUPPORT_UCP
1411     oc = UCD_OTHERCASE(c);
1412     #else
1413     oc = c;
1414     #endif
1415     }
1416     }
1417     else
1418     {
1419     c = *cc;
1420 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1421 ph10 664 }
1422     #else
1423     c = *cc;
1424 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1425 ph10 664 #endif
1426    
1427     SLJIT_ASSERT(c != oc);
1428    
1429     bit = c ^ oc;
1430     /* Optimized for English alphabet. */
1431     if (c <= 127 && bit == 0x20)
1432     return (0 << 8) | 0x20;
1433    
1434     /* Since c != oc, they must have at least 1 bit difference. */
1435     if (!ispowerof2(bit))
1436     return 0;
1437    
1438 ph10 836 #ifdef COMPILE_PCRE8
1439    
1440     #ifdef SUPPORT_UTF
1441     if (common->utf && c > 127)
1442 ph10 664 {
1443 ph10 836 n = GET_EXTRALEN(*cc);
1444 ph10 664 while ((bit & 0x3f) == 0)
1445     {
1446     n--;
1447     bit >>= 6;
1448     }
1449     return (n << 8) | bit;
1450     }
1451 ph10 836 #endif /* SUPPORT_UTF */
1452 ph10 664 return (0 << 8) | bit;
1453 ph10 836
1454     #else /* COMPILE_PCRE8 */
1455    
1456     #ifdef COMPILE_PCRE16
1457     #ifdef SUPPORT_UTF
1458     if (common->utf && c > 65535)
1459     {
1460     if (bit >= (1 << 10))
1461     bit >>= 10;
1462     else
1463     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1464     }
1465     #endif /* SUPPORT_UTF */
1466     return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1467     #endif /* COMPILE_PCRE16 */
1468    
1469     #endif /* COMPILE_PCRE8 */
1470 ph10 664 }
1471    
1472 zherczeg 918 static void check_partial(compiler_common *common, BOOL force)
1473 ph10 664 {
1474 zherczeg 918 /* Checks whether a partial matching is occured. Does not modify registers. */
1475 ph10 664 DEFINE_COMPILER;
1476 zherczeg 918 struct sljit_jump *jump = NULL;
1477 zherczeg 914
1478 zherczeg 918 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1479    
1480 zherczeg 914 if (common->mode == JIT_COMPILE)
1481     return;
1482    
1483 zherczeg 920 if (!force)
1484     jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1485     else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1486     jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1487 zherczeg 918
1488 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1489 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1490 zherczeg 914 else
1491     {
1492     if (common->partialmatchlabel != NULL)
1493     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1494     else
1495     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1496     }
1497 zherczeg 918
1498     if (jump != NULL)
1499     JUMPHERE(jump);
1500 ph10 664 }
1501    
1502 zherczeg 914 static struct sljit_jump *check_str_end(compiler_common *common)
1503     {
1504     /* Does not affect registers. Usually used in a tight spot. */
1505     DEFINE_COMPILER;
1506     struct sljit_jump *jump;
1507     struct sljit_jump *nohit;
1508     struct sljit_jump *return_value;
1509    
1510     if (common->mode == JIT_COMPILE)
1511     return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1512    
1513     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1514     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1515     {
1516 zherczeg 920 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1517     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1518 zherczeg 914 JUMPHERE(nohit);
1519     return_value = JUMP(SLJIT_JUMP);
1520     }
1521     else
1522     {
1523 zherczeg 920 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1524 zherczeg 914 if (common->partialmatchlabel != NULL)
1525     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1526     else
1527     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1528     }
1529     JUMPHERE(jump);
1530     return return_value;
1531     }
1532    
1533     static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1534     {
1535     DEFINE_COMPILER;
1536     struct sljit_jump *jump;
1537    
1538     if (common->mode == JIT_COMPILE)
1539     {
1540     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1541     return;
1542     }
1543    
1544     /* Partial matching mode. */
1545     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1546 zherczeg 920 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1547 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1548     {
1549 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1550 zherczeg 914 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1551     }
1552     else
1553     {
1554     if (common->partialmatchlabel != NULL)
1555     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1556     else
1557     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1558     }
1559     JUMPHERE(jump);
1560     }
1561    
1562 ph10 664 static void read_char(compiler_common *common)
1563     {
1564     /* Reads the character into TMP1, updates STR_PTR.
1565     Does not check STR_END. TMP2 Destroyed. */
1566     DEFINE_COMPILER;
1567 ph10 836 #ifdef SUPPORT_UTF
1568 ph10 664 struct sljit_jump *jump;
1569     #endif
1570    
1571 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1572     #ifdef SUPPORT_UTF
1573     if (common->utf)
1574 ph10 664 {
1575 ph10 836 #ifdef COMPILE_PCRE8
1576 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1577 ph10 836 #else
1578     #ifdef COMPILE_PCRE16
1579     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1580     #endif
1581     #endif /* COMPILE_PCRE8 */
1582     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1583 ph10 664 JUMPHERE(jump);
1584     }
1585     #endif
1586 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1587 ph10 664 }
1588    
1589     static void peek_char(compiler_common *common)
1590     {
1591     /* Reads the character into TMP1, keeps STR_PTR.
1592     Does not check STR_END. TMP2 Destroyed. */
1593     DEFINE_COMPILER;
1594 ph10 836 #ifdef SUPPORT_UTF
1595 ph10 664 struct sljit_jump *jump;
1596     #endif
1597    
1598 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1599     #ifdef SUPPORT_UTF
1600     if (common->utf)
1601 ph10 664 {
1602 ph10 836 #ifdef COMPILE_PCRE8
1603 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1604 ph10 836 #else
1605     #ifdef COMPILE_PCRE16
1606     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1607     #endif
1608     #endif /* COMPILE_PCRE8 */
1609     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1610 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1611     JUMPHERE(jump);
1612     }
1613     #endif
1614     }
1615    
1616     static void read_char8_type(compiler_common *common)
1617     {
1618     /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1619     DEFINE_COMPILER;
1620 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1621 ph10 664 struct sljit_jump *jump;
1622     #endif
1623    
1624 ph10 836 #ifdef SUPPORT_UTF
1625     if (common->utf)
1626 ph10 664 {
1627 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1628     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1629     #ifdef COMPILE_PCRE8
1630 ph10 664 /* This can be an extra read in some situations, but hopefully
1631 ph10 836 it is needed in most cases. */
1632 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1633 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1634 ph10 836 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1635 ph10 664 JUMPHERE(jump);
1636 ph10 836 #else
1637     #ifdef COMPILE_PCRE16
1638     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1639     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1640     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1641     JUMPHERE(jump);
1642     /* Skip low surrogate if necessary. */
1643     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1644     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1645     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1646     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1647     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1648     #endif
1649     #endif /* COMPILE_PCRE8 */
1650 ph10 664 return;
1651     }
1652     #endif
1653 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1654     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1655     #ifdef COMPILE_PCRE16
1656     /* The ctypes array contains only 256 values. */
1657     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1658     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1659     #endif
1660     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1661     #ifdef COMPILE_PCRE16
1662     JUMPHERE(jump);
1663     #endif
1664 ph10 664 }
1665    
1666     static void skip_char_back(compiler_common *common)
1667     {
1668 ph10 836 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1669 ph10 664 DEFINE_COMPILER;
1670 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1671 ph10 664 struct sljit_label *label;
1672    
1673 ph10 836 if (common->utf)
1674 ph10 664 {
1675     label = LABEL();
1676 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1677     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1678 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1679     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1680     return;
1681     }
1682     #endif
1683 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1684     if (common->utf)
1685     {
1686     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1687     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1688     /* Skip low surrogate if necessary. */
1689     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1690     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1691     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1692     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1693     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1694     return;
1695     }
1696     #endif
1697     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1698 ph10 664 }
1699    
1700     static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1701     {
1702     /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1703     DEFINE_COMPILER;
1704    
1705     if (nltype == NLTYPE_ANY)
1706     {
1707     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1708     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1709     }
1710     else if (nltype == NLTYPE_ANYCRLF)
1711     {
1712     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1713     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1714     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1715     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1716     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1717     }
1718     else
1719     {
1720 ph10 836 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1721 ph10 664 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1722     }
1723     }
1724    
1725 ph10 836 #ifdef SUPPORT_UTF
1726    
1727     #ifdef COMPILE_PCRE8
1728     static void do_utfreadchar(compiler_common *common)
1729 ph10 664 {
1730 ph10 836 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1731 zherczeg 736 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1732 ph10 664 DEFINE_COMPILER;
1733     struct sljit_jump *jump;
1734    
1735     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1736     /* Searching for the first zero. */
1737     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1738     jump = JUMP(SLJIT_C_NOT_ZERO);
1739 ph10 836 /* Two byte sequence. */
1740     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1741     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1742 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1743     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1744     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1745     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1746 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1747 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1748     JUMPHERE(jump);
1749    
1750     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1751     jump = JUMP(SLJIT_C_NOT_ZERO);
1752 ph10 836 /* Three byte sequence. */
1753     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1754 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1755     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1756     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1757     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1758     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1759 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1760     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1761 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1762     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1763 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1764 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1765     JUMPHERE(jump);
1766    
1767 ph10 836 /* Four byte sequence. */
1768     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1769 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1770     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1771     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1772     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1773     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1774 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1775 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1776     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1777     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1778 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1779     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1780 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1781     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1782 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1783 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1784     }
1785    
1786 ph10 836 static void do_utfreadtype8(compiler_common *common)
1787 ph10 664 {
1788 ph10 836 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1789     of the character (>= 0xc0). Return value in TMP1. */
1790 ph10 664 DEFINE_COMPILER;
1791     struct sljit_jump *jump;
1792     struct sljit_jump *compare;
1793    
1794     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1795    
1796     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1797     jump = JUMP(SLJIT_C_NOT_ZERO);
1798 ph10 836 /* Two byte sequence. */
1799     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1800     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1801 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1802     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1803     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1804     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1805     compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1806     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1807     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1808    
1809     JUMPHERE(compare);
1810     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1811     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1812     JUMPHERE(jump);
1813    
1814     /* We only have types for characters less than 256. */
1815 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1816 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1817     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1818     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1819     }
1820    
1821 ph10 836 #else /* COMPILE_PCRE8 */
1822 ph10 664
1823 ph10 836 #ifdef COMPILE_PCRE16
1824     static void do_utfreadchar(compiler_common *common)
1825     {
1826     /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1827     of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1828     DEFINE_COMPILER;
1829     struct sljit_jump *jump;
1830    
1831     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1832     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1833     /* Do nothing, only return. */
1834     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1835    
1836     JUMPHERE(jump);
1837     /* Combine two 16 bit characters. */
1838     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1839     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1840     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1841     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1842     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1843     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1844     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1845     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1846     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1847     }
1848     #endif /* COMPILE_PCRE16 */
1849    
1850     #endif /* COMPILE_PCRE8 */
1851    
1852     #endif /* SUPPORT_UTF */
1853    
1854 ph10 664 #ifdef SUPPORT_UCP
1855    
1856     /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1857     #define UCD_BLOCK_MASK 127
1858     #define UCD_BLOCK_SHIFT 7
1859    
1860     static void do_getucd(compiler_common *common)
1861     {
1862     /* Search the UCD record for the character comes in TMP1.
1863     Returns chartype in TMP1 and UCD offset in TMP2. */
1864     DEFINE_COMPILER;
1865    
1866     SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1867    
1868     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1869     OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1870 ph10 836 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1871 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1872     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1873     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1874 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1875 ph10 664 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1876 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1877 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1878     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1879     }
1880     #endif
1881    
1882     static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1883     {
1884     DEFINE_COMPILER;
1885     struct sljit_label *mainloop;
1886     struct sljit_label *newlinelabel = NULL;
1887     struct sljit_jump *start;
1888     struct sljit_jump *end = NULL;
1889     struct sljit_jump *nl = NULL;
1890 ph10 836 #ifdef SUPPORT_UTF
1891     struct sljit_jump *singlechar;
1892 zherczeg 736 #endif
1893 ph10 664 jump_list *newline = NULL;
1894     BOOL newlinecheck = FALSE;
1895 ph10 836 BOOL readuchar = FALSE;
1896 ph10 664
1897     if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1898     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1899     newlinecheck = TRUE;
1900    
1901     if (firstline)
1902     {
1903     /* Search for the end of the first line. */
1904 zherczeg 920 SLJIT_ASSERT(common->first_line_end != 0);
1905 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1906 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1907 ph10 664
1908     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1909     {
1910     mainloop = LABEL();
1911 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1912 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1913 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1914     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1915 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1916     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1917 zherczeg 920 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1918 ph10 664 }
1919     else
1920     {
1921     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1922     mainloop = LABEL();
1923     /* Continual stores does not cause data dependency. */
1924 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1925 ph10 664 read_char(common);
1926     check_newlinechar(common, common->nltype, &newline, TRUE);
1927     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1928 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1929 ph10 664 set_jumps(newline, LABEL());
1930     }
1931    
1932     JUMPHERE(end);
1933     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1934     }
1935    
1936     start = JUMP(SLJIT_JUMP);
1937    
1938     if (newlinecheck)
1939     {
1940     newlinelabel = LABEL();
1941 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1942 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1943 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1944 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1945     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1946 ph10 836 #ifdef COMPILE_PCRE16
1947     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1948     #endif
1949 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1950     nl = JUMP(SLJIT_JUMP);
1951     }
1952    
1953     mainloop = LABEL();
1954    
1955     /* Increasing the STR_PTR here requires one less jump in the most common case. */
1956 ph10 836 #ifdef SUPPORT_UTF
1957     if (common->utf) readuchar = TRUE;
1958 ph10 664 #endif
1959 ph10 836 if (newlinecheck) readuchar = TRUE;
1960 ph10 664
1961 ph10 836 if (readuchar)
1962     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1963 ph10 664
1964     if (newlinecheck)
1965     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1966    
1967 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1968     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1969     if (common->utf)
1970 ph10 664 {
1971 ph10 836 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1972     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1973 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1974 ph10 836 JUMPHERE(singlechar);
1975 ph10 664 }
1976     #endif
1977 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1978     if (common->utf)
1979     {
1980     singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1981     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1982     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1983     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1984     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1985     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1986     JUMPHERE(singlechar);
1987     }
1988     #endif
1989 ph10 664 JUMPHERE(start);
1990    
1991     if (newlinecheck)
1992     {
1993     JUMPHERE(end);
1994     JUMPHERE(nl);
1995     }
1996    
1997     return mainloop;
1998     }
1999    
2000 ph10 836 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2001 ph10 664 {
2002     DEFINE_COMPILER;
2003     struct sljit_label *start;
2004     struct sljit_jump *leave;
2005     struct sljit_jump *found;
2006 ph10 836 pcre_uchar oc, bit;
2007 ph10 664
2008     if (firstline)
2009     {
2010     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2011 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2012 ph10 664 }
2013    
2014     start = LABEL();
2015     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2016 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2017 ph10 664
2018 ph10 836 oc = first_char;
2019     if (caseless)
2020     {
2021     oc = TABLE_GET(first_char, common->fcc, first_char);
2022     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2023     if (first_char > 127 && common->utf)
2024     oc = UCD_OTHERCASE(first_char);
2025     #endif
2026     }
2027     if (first_char == oc)
2028     found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2029 ph10 664 else
2030     {
2031 ph10 836 bit = first_char ^ oc;
2032 ph10 664 if (ispowerof2(bit))
2033     {
2034     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2035 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2036 ph10 664 }
2037     else
2038     {
2039 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2040 ph10 664 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2041     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2042     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2043     found = JUMP(SLJIT_C_NOT_ZERO);
2044     }
2045     }
2046    
2047 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2048     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2049     if (common->utf)
2050 ph10 664 {
2051 zherczeg 736 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2052 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2053 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2054     }
2055     #endif
2056 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2057     if (common->utf)
2058     {
2059     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2060     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2061     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2062     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2063     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2064     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2065     }
2066     #endif
2067 ph10 664 JUMPTO(SLJIT_JUMP, start);
2068     JUMPHERE(found);
2069     JUMPHERE(leave);
2070    
2071     if (firstline)
2072     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2073     }
2074    
2075     static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2076     {
2077     DEFINE_COMPILER;
2078     struct sljit_label *loop;
2079     struct sljit_jump *lastchar;
2080     struct sljit_jump *firstchar;
2081     struct sljit_jump *leave;
2082     struct sljit_jump *foundcr = NULL;
2083     struct sljit_jump *notfoundnl;
2084     jump_list *newline = NULL;
2085    
2086     if (firstline)
2087     {
2088     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2089 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2090 ph10 664 }
2091    
2092     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2093     {
2094     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2095     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2096     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2097     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2098     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2099    
2100 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2101 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2102     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2103 ph10 836 #ifdef COMPILE_PCRE16
2104     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2105     #endif
2106 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2107    
2108     loop = LABEL();
2109 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2110 ph10 664 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2111 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2112     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2113 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2114     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2115    
2116     JUMPHERE(leave);
2117     JUMPHERE(firstchar);
2118     JUMPHERE(lastchar);
2119    
2120     if (firstline)
2121     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2122     return;
2123     }
2124    
2125     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2126     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2127     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2128     skip_char_back(common);
2129    
2130     loop = LABEL();
2131     read_char(common);
2132     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2133     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2134     foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2135     check_newlinechar(common, common->nltype, &newline, FALSE);
2136     set_jumps(newline, loop);
2137    
2138     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2139     {
2140     leave = JUMP(SLJIT_JUMP);
2141     JUMPHERE(foundcr);
2142     notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2143 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2144 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2145     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2146 ph10 836 #ifdef COMPILE_PCRE16
2147     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2148     #endif
2149 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2150     JUMPHERE(notfoundnl);
2151     JUMPHERE(leave);
2152     }
2153     JUMPHERE(lastchar);
2154     JUMPHERE(firstchar);
2155    
2156     if (firstline)
2157     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2158     }
2159    
2160     static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2161     {
2162     DEFINE_COMPILER;
2163     struct sljit_label *start;
2164     struct sljit_jump *leave;
2165     struct sljit_jump *found;
2166 ph10 836 #ifndef COMPILE_PCRE8
2167     struct sljit_jump *jump;
2168     #endif
2169 ph10 664
2170     if (firstline)
2171     {
2172     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2173 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2174 ph10 664 }
2175    
2176     start = LABEL();
2177     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2178 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2179     #ifdef SUPPORT_UTF
2180     if (common->utf)
2181 zherczeg 736 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2182 ph10 664 #endif
2183 ph10 836 #ifndef COMPILE_PCRE8
2184     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2185     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2186     JUMPHERE(jump);
2187     #endif
2188 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2189     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2190     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2191     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2192     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2193     found = JUMP(SLJIT_C_NOT_ZERO);
2194    
2195 ph10 836 #ifdef SUPPORT_UTF
2196     if (common->utf)
2197 zherczeg 736 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2198     #endif
2199 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2200     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2201     if (common->utf)
2202 zherczeg 736 {
2203     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2204 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2205 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2206     }
2207 ph10 664 #endif
2208 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2209     if (common->utf)
2210     {
2211     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2212     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2213     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2214     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2215     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2216     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2217     }
2218     #endif
2219 ph10 664 JUMPTO(SLJIT_JUMP, start);
2220     JUMPHERE(found);
2221     JUMPHERE(leave);
2222    
2223     if (firstline)
2224     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2225     }
2226    
2227 ph10 836 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2228 ph10 664 {
2229     DEFINE_COMPILER;
2230     struct sljit_label *loop;
2231     struct sljit_jump *toolong;
2232     struct sljit_jump *alreadyfound;
2233     struct sljit_jump *found;
2234     struct sljit_jump *foundoc = NULL;
2235     struct sljit_jump *notfound;
2236 ph10 836 pcre_uchar oc, bit;
2237 ph10 664
2238 zherczeg 920 SLJIT_ASSERT(common->req_char_ptr != 0);
2239     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2240 ph10 664 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2241     toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2242     alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2243    
2244 ph10 836 if (has_firstchar)
2245     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2246 ph10 664 else
2247     OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2248    
2249     loop = LABEL();
2250     notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2251    
2252 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2253     oc = req_char;
2254     if (caseless)
2255     {
2256     oc = TABLE_GET(req_char, common->fcc, req_char);
2257     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2258     if (req_char > 127 && common->utf)
2259     oc = UCD_OTHERCASE(req_char);
2260     #endif
2261     }
2262     if (req_char == oc)
2263     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2264 ph10 664 else
2265     {
2266 ph10 836 bit = req_char ^ oc;
2267 ph10 664 if (ispowerof2(bit))
2268     {
2269     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2270 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2271 ph10 664 }
2272     else
2273     {
2274 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2275 ph10 664 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2276     }
2277     }
2278 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2279 ph10 664 JUMPTO(SLJIT_JUMP, loop);
2280    
2281     JUMPHERE(found);
2282     if (foundoc)
2283     JUMPHERE(foundoc);
2284 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2285 ph10 664 JUMPHERE(alreadyfound);
2286     JUMPHERE(toolong);
2287     return notfound;
2288     }
2289    
2290     static void do_revertframes(compiler_common *common)
2291     {
2292     DEFINE_COMPILER;
2293     struct sljit_jump *jump;
2294     struct sljit_label *mainloop;
2295    
2296     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2297 zherczeg 726 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2298 ph10 664
2299     /* Drop frames until we reach STACK_TOP. */
2300     mainloop = LABEL();
2301     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2302     jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2303     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2304     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2305     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2306     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2307     JUMPTO(SLJIT_JUMP, mainloop);
2308    
2309     JUMPHERE(jump);
2310     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2311     /* End of dropping frames. */
2312     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2313    
2314     JUMPHERE(jump);
2315     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2316 zherczeg 696 /* Set string begin. */
2317 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2318     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2319     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2320     JUMPTO(SLJIT_JUMP, mainloop);
2321    
2322     JUMPHERE(jump);
2323     /* Unknown command. */
2324     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2325     JUMPTO(SLJIT_JUMP, mainloop);
2326     }
2327    
2328     static void check_wordboundary(compiler_common *common)
2329     {
2330     DEFINE_COMPILER;
2331 zherczeg 914 struct sljit_jump *skipread;
2332 ph10 836 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2333 ph10 664 struct sljit_jump *jump;
2334 ph10 670 #endif
2335 ph10 664
2336 zherczeg 741 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2337 ph10 664
2338     sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2339     /* Get type of the previous char, and put it to LOCALS1. */
2340     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2341     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2342     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2343 zherczeg 914 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2344 ph10 664 skip_char_back(common);
2345 zherczeg 914 check_start_used_ptr(common);
2346 ph10 664 read_char(common);
2347    
2348     /* Testing char type. */
2349     #ifdef SUPPORT_UCP
2350 ph10 836 if (common->use_ucp)
2351 ph10 664 {
2352     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2353     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2354     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2355     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2356     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2357     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2358     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2359     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2360     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2361     JUMPHERE(jump);
2362     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2363     }
2364     else
2365     #endif
2366     {
2367 ph10 836 #ifndef COMPILE_PCRE8
2368     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2369     #elif defined SUPPORT_UTF
2370 ph10 664 /* Here LOCALS1 has already been zeroed. */
2371     jump = NULL;
2372 ph10 836 if (common->utf)
2373 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2374 ph10 836 #endif /* COMPILE_PCRE8 */
2375 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2376     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2377     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2378     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2379 ph10 836 #ifndef COMPILE_PCRE8
2380     JUMPHERE(jump);
2381     #elif defined SUPPORT_UTF
2382 ph10 664 if (jump != NULL)
2383     JUMPHERE(jump);
2384 ph10 836 #endif /* COMPILE_PCRE8 */
2385 ph10 664 }
2386 zherczeg 914 JUMPHERE(skipread);
2387 ph10 664
2388     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2389 zherczeg 914 skipread = check_str_end(common);
2390 ph10 664 peek_char(common);
2391    
2392     /* Testing char type. This is a code duplication. */
2393     #ifdef SUPPORT_UCP
2394 ph10 836 if (common->use_ucp)
2395 ph10 664 {
2396     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2397     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2398     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2399     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2400     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2401     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2402     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2403     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2404     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2405     JUMPHERE(jump);
2406     }
2407     else
2408     #endif
2409     {
2410 ph10 836 #ifndef COMPILE_PCRE8
2411     /* TMP2 may be destroyed by peek_char. */
2412 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2413 ph10 836 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2414     #elif defined SUPPORT_UTF
2415     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2416 ph10 664 jump = NULL;
2417 ph10 836 if (common->utf)
2418 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2419     #endif
2420     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2421     OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2422     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2423 ph10 836 #ifndef COMPILE_PCRE8
2424     JUMPHERE(jump);
2425     #elif defined SUPPORT_UTF
2426 ph10 664 if (jump != NULL)
2427     JUMPHERE(jump);
2428 ph10 836 #endif /* COMPILE_PCRE8 */
2429 ph10 664 }
2430 zherczeg 914 JUMPHERE(skipread);
2431 ph10 664
2432     OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2433     sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2434     }
2435    
2436     static void check_anynewline(compiler_common *common)
2437     {
2438     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2439     DEFINE_COMPILER;
2440    
2441     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2442    
2443     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2444     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2445     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2446     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2447 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2448     #ifdef COMPILE_PCRE8
2449     if (common->utf)
2450 ph10 664 {
2451 ph10 836 #endif
2452 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2453     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2454     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2455 ph10 836 #ifdef COMPILE_PCRE8
2456 ph10 664 }
2457     #endif
2458 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2459 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2460     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2461     }
2462    
2463     static void check_hspace(compiler_common *common)
2464     {
2465     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2466     DEFINE_COMPILER;
2467    
2468     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2469    
2470     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2471     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2472     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2473     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2474     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2475 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2476     #ifdef COMPILE_PCRE8
2477     if (common->utf)
2478 ph10 664 {
2479 ph10 836 #endif
2480 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2481     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2482     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2483     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2484     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2485     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2486     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2487     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2488     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2489     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2490     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2491     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2492     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2493 ph10 836 #ifdef COMPILE_PCRE8
2494 ph10 664 }
2495     #endif
2496 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2497 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2498    
2499     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2500     }
2501    
2502     static void check_vspace(compiler_common *common)
2503     {
2504     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2505     DEFINE_COMPILER;
2506    
2507     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2508    
2509     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2510     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2511     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2512     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2513 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2514     #ifdef COMPILE_PCRE8
2515     if (common->utf)
2516 ph10 664 {
2517 ph10 836 #endif
2518 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2519     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2520     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2521 ph10 836 #ifdef COMPILE_PCRE8
2522 ph10 664 }
2523     #endif
2524 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2525 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2526    
2527     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2528     }
2529    
2530     #define CHAR1 STR_END
2531     #define CHAR2 STACK_TOP
2532    
2533     static void do_casefulcmp(compiler_common *common)
2534     {
2535     DEFINE_COMPILER;
2536     struct sljit_jump *jump;
2537     struct sljit_label *label;
2538    
2539     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2540     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2541     OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2542     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2543 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2544     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545 ph10 664
2546     label = LABEL();
2547 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2548     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2549 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2550 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2551 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2552    
2553     JUMPHERE(jump);
2554 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2555 ph10 664 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2556     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2557     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2558     }
2559    
2560     #define LCC_TABLE STACK_LIMIT
2561    
2562     static void do_caselesscmp(compiler_common *common)
2563     {
2564     DEFINE_COMPILER;
2565     struct sljit_jump *jump;
2566     struct sljit_label *label;
2567    
2568     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2569     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2570    
2571     OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2572     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2573     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2574     OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2575 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2576     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2577 ph10 664
2578     label = LABEL();
2579 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2580     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2581     #ifndef COMPILE_PCRE8
2582     jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2583     #endif
2584 ph10 664 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2585 ph10 836 #ifndef COMPILE_PCRE8
2586     JUMPHERE(jump);
2587     jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2588     #endif
2589 ph10 664 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2590 ph10 836 #ifndef COMPILE_PCRE8
2591     JUMPHERE(jump);
2592     #endif
2593 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2594 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2595 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2596    
2597     JUMPHERE(jump);
2598 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 ph10 664 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2600     OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2601     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2602     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2603     }
2604    
2605     #undef LCC_TABLE
2606     #undef CHAR1
2607     #undef CHAR2
2608    
2609 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2610 ph10 664
2611 ph10 836 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2612 ph10 664 {
2613     /* This function would be ineffective to do in JIT level. */
2614     int c1, c2;
2615 ph10 836 const pcre_uchar *src2 = args->ptr;
2616     const pcre_uchar *end2 = args->end;
2617 ph10 664
2618     while (src1 < end1)
2619     {
2620     if (src2 >= end2)
2621 zherczeg 915 return (pcre_uchar*)1;
2622 ph10 664 GETCHARINC(c1, src1);
2623     GETCHARINC(c2, src2);
2624 zherczeg 915 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2625 ph10 664 }
2626     return src2;
2627     }
2628    
2629 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2630 ph10 664
2631 ph10 836 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2632 ph10 664 compare_context* context, jump_list **fallbacks)
2633     {
2634     DEFINE_COMPILER;
2635     unsigned int othercasebit = 0;
2636 ph10 836 pcre_uchar *othercasechar = NULL;
2637     #ifdef SUPPORT_UTF
2638     int utflength;
2639 ph10 664 #endif
2640    
2641     if (caseless && char_has_othercase(common, cc))
2642     {
2643     othercasebit = char_get_othercase_bit(common, cc);
2644     SLJIT_ASSERT(othercasebit);
2645     /* Extracting bit difference info. */
2646 ph10 836 #ifdef COMPILE_PCRE8
2647     othercasechar = cc + (othercasebit >> 8);
2648 ph10 664 othercasebit &= 0xff;
2649 ph10 836 #else
2650     #ifdef COMPILE_PCRE16
2651     othercasechar = cc + (othercasebit >> 9);
2652     if ((othercasebit & 0x100) != 0)
2653     othercasebit = (othercasebit & 0xff) << 8;
2654     else
2655     othercasebit &= 0xff;
2656     #endif
2657     #endif
2658 ph10 664 }
2659    
2660     if (context->sourcereg == -1)
2661     {
2662 ph10 836 #ifdef COMPILE_PCRE8
2663 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2664     if (context->length >= 4)
2665     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2666     else if (context->length >= 2)
2667 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2668 ph10 664 else
2669     #endif
2670     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2671 ph10 836 #else
2672     #ifdef COMPILE_PCRE16
2673     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2674     if (context->length >= 4)
2675     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2676     else
2677     #endif
2678 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2679 ph10 836 #endif
2680     #endif /* COMPILE_PCRE8 */
2681 ph10 664 context->sourcereg = TMP2;
2682     }
2683    
2684 ph10 836 #ifdef SUPPORT_UTF
2685     utflength = 1;
2686     if (common->utf && HAS_EXTRALEN(*cc))
2687     utflength += GET_EXTRALEN(*cc);
2688 ph10 664
2689     do
2690     {
2691     #endif
2692    
2693 ph10 836 context->length -= IN_UCHARS(1);
2694 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2695    
2696     /* Unaligned read is supported. */
2697 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2698 ph10 664 {
2699 ph10 836 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2700     context->oc.asuchars[context->ucharptr] = othercasebit;
2701 ph10 664 }
2702     else
2703     {
2704 ph10 836 context->c.asuchars[context->ucharptr] = *cc;
2705     context->oc.asuchars[context->ucharptr] = 0;
2706 ph10 664 }
2707 ph10 836 context->ucharptr++;
2708 ph10 664
2709 ph10 836 #ifdef COMPILE_PCRE8
2710     if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2711     #else
2712     if (context->ucharptr >= 2 || context->length == 0)
2713     #endif
2714 ph10 664 {
2715     if (context->length >= 4)
2716     OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2717 ph10 836 #ifdef COMPILE_PCRE8
2718 ph10 664 else if (context->length >= 2)
2719 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2720 ph10 664 else if (context->length >= 1)
2721     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2722 ph10 836 #else
2723     else if (context->length >= 2)
2724 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2725 ph10 836 #endif
2726 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2727    
2728 ph10 836 switch(context->ucharptr)
2729 ph10 664 {
2730 ph10 836 case 4 / sizeof(pcre_uchar):
2731 ph10 664 if (context->oc.asint != 0)
2732     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2733     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2734     break;
2735    
2736 ph10 836 case 2 / sizeof(pcre_uchar):
2737 zherczeg 847 if (context->oc.asushort != 0)
2738     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2739     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2740 ph10 664 break;
2741    
2742 ph10 836 #ifdef COMPILE_PCRE8
2743 ph10 664 case 1:
2744     if (context->oc.asbyte != 0)
2745     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2746     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2747     break;
2748 ph10 836 #endif
2749 ph10 664
2750     default:
2751     SLJIT_ASSERT_STOP();
2752     break;
2753     }
2754 ph10 836 context->ucharptr = 0;
2755 ph10 664 }
2756 ph10 691
2757 ph10 664 #else
2758    
2759     /* Unaligned read is unsupported. */
2760 ph10 836 #ifdef COMPILE_PCRE8
2761 ph10 664 if (context->length > 0)
2762     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2763 ph10 836 #else
2764     if (context->length > 0)
2765     OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2766     #endif
2767 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2768    
2769 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2770 ph10 664 {
2771     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2772     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2773     }
2774     else
2775     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2776    
2777     #endif
2778    
2779     cc++;
2780 ph10 836 #ifdef SUPPORT_UTF
2781     utflength--;
2782 ph10 664 }
2783 ph10 836 while (utflength > 0);
2784 ph10 664 #endif
2785    
2786     return cc;
2787     }
2788    
2789 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2790 ph10 664
2791     #define SET_TYPE_OFFSET(value) \
2792     if ((value) != typeoffset) \
2793     { \
2794     if ((value) > typeoffset) \
2795     OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2796     else \
2797     OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2798     } \
2799     typeoffset = (value);
2800    
2801     #define SET_CHAR_OFFSET(value) \
2802     if ((value) != charoffset) \
2803     { \
2804     if ((value) > charoffset) \
2805     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2806     else \
2807     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2808     } \
2809     charoffset = (value);
2810    
2811 ph10 836 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2812 ph10 664 {
2813     DEFINE_COMPILER;
2814     jump_list *found = NULL;
2815     jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2816     unsigned int c;
2817     int compares;
2818     struct sljit_jump *jump = NULL;
2819 ph10 836 pcre_uchar *ccbegin;
2820 ph10 664 #ifdef SUPPORT_UCP
2821     BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2822     BOOL charsaved = FALSE;
2823 zherczeg 715 int typereg = TMP1, scriptreg = TMP1;
2824     unsigned int typeoffset;
2825 ph10 664 #endif
2826 zherczeg 715 int invertcmp, numberofcmps;
2827     unsigned int charoffset;
2828 ph10 664
2829 ph10 836 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2830 zherczeg 914 fallback_at_str_end(common, fallbacks);
2831 ph10 664 read_char(common);
2832    
2833     if ((*cc++ & XCL_MAP) != 0)
2834     {
2835     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2836 ph10 836 #ifndef COMPILE_PCRE8
2837     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2838     #elif defined SUPPORT_UTF
2839     if (common->utf)
2840 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2841 ph10 836 #endif
2842 ph10 664
2843     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2844     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2845     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2846     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2847     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2848     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2849    
2850 ph10 836 #ifndef COMPILE_PCRE8
2851     JUMPHERE(jump);
2852     #elif defined SUPPORT_UTF
2853     if (common->utf)
2854 ph10 664 JUMPHERE(jump);
2855 ph10 836 #endif
2856 ph10 664 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2857     #ifdef SUPPORT_UCP
2858     charsaved = TRUE;
2859     #endif
2860 ph10 836 cc += 32 / sizeof(pcre_uchar);
2861 ph10 664 }
2862    
2863     /* Scanning the necessary info. */
2864     ccbegin = cc;
2865     compares = 0;
2866     while (*cc != XCL_END)
2867     {
2868     compares++;
2869     if (*cc == XCL_SINGLE)
2870     {
2871     cc += 2;
2872 ph10 836 #ifdef SUPPORT_UTF
2873     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2874 ph10 664 #endif
2875     #ifdef SUPPORT_UCP
2876     needschar = TRUE;
2877     #endif
2878     }
2879     else if (*cc == XCL_RANGE)
2880     {
2881     cc += 2;
2882 ph10 836 #ifdef SUPPORT_UTF
2883     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2884 ph10 664 #endif
2885     cc++;
2886 ph10 836 #ifdef SUPPORT_UTF
2887     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2888 ph10 664 #endif
2889     #ifdef SUPPORT_UCP
2890     needschar = TRUE;
2891     #endif
2892     }
2893     #ifdef SUPPORT_UCP
2894     else
2895     {
2896     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2897     cc++;
2898     switch(*cc)
2899     {
2900     case PT_ANY:
2901     break;
2902    
2903     case PT_LAMP:
2904     case PT_GC:
2905     case PT_PC:
2906     case PT_ALNUM:
2907     needstype = TRUE;
2908     break;
2909    
2910     case PT_SC:
2911     needsscript = TRUE;
2912     break;
2913    
2914     case PT_SPACE:
2915     case PT_PXSPACE:
2916     case PT_WORD:
2917     needstype = TRUE;
2918     needschar = TRUE;
2919     break;
2920    
2921     default:
2922     SLJIT_ASSERT_STOP();
2923     break;
2924     }
2925     cc += 2;
2926     }
2927     #endif
2928     }
2929    
2930     #ifdef SUPPORT_UCP
2931     /* Simple register allocation. TMP1 is preferred if possible. */
2932     if (needstype || needsscript)
2933     {
2934     if (needschar && !charsaved)
2935     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2936     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2937     if (needschar)
2938     {
2939     if (needstype)
2940     {
2941     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2942     typereg = RETURN_ADDR;
2943     }
2944    
2945     if (needsscript)
2946     scriptreg = TMP3;
2947     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2948     }
2949     else if (needstype && needsscript)
2950     scriptreg = TMP3;
2951     /* In all other cases only one of them was specified, and that can goes to TMP1. */
2952    
2953     if (needsscript)
2954     {
2955     if (scriptreg == TMP1)
2956     {
2957 ph10 836 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2958 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2959     }
2960     else
2961     {
2962     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2963 ph10 836 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2964 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2965     }
2966     }
2967     }
2968     #endif
2969    
2970     /* Generating code. */
2971     cc = ccbegin;
2972     charoffset = 0;
2973     numberofcmps = 0;
2974     #ifdef SUPPORT_UCP
2975     typeoffset = 0;
2976     #endif
2977    
2978     while (*cc != XCL_END)
2979     {
2980     compares--;
2981     invertcmp = (compares == 0 && list != fallbacks);
2982     jump = NULL;
2983    
2984     if (*cc == XCL_SINGLE)
2985     {
2986     cc ++;
2987 ph10 836 #ifdef SUPPORT_UTF
2988     if (common->utf)
2989 ph10 664 {
2990     GETCHARINC(c, cc);
2991     }
2992     else
2993     #endif
2994     c = *cc++;
2995    
2996     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2997     {
2998     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2999     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3000     numberofcmps++;
3001     }
3002     else if (numberofcmps > 0)
3003     {
3004     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3005     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3006     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3007     numberofcmps = 0;
3008     }
3009     else
3010     {
3011     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3012     numberofcmps = 0;
3013     }
3014     }
3015     else if (*cc == XCL_RANGE)
3016     {
3017     cc ++;
3018 ph10 836 #ifdef SUPPORT_UTF
3019     if (common->utf)
3020 ph10 664 {
3021     GETCHARINC(c, cc);
3022     }
3023     else
3024     #endif
3025     c = *cc++;
3026     SET_CHAR_OFFSET(c);
3027 ph10 836 #ifdef SUPPORT_UTF
3028     if (common->utf)
3029 ph10 664 {
3030     GETCHARINC(c, cc);
3031     }
3032     else
3033     #endif
3034     c = *cc++;
3035     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3036     {
3037     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3038     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3039     numberofcmps++;
3040     }
3041     else if (numberofcmps > 0)
3042     {
3043     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3044     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3045     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3046     numberofcmps = 0;
3047     }
3048     else
3049     {
3050     jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3051     numberofcmps = 0;
3052     }
3053     }
3054     #ifdef SUPPORT_UCP
3055     else
3056     {
3057     if (*cc == XCL_NOTPROP)
3058     invertcmp ^= 0x1;
3059     cc++;
3060     switch(*cc)
3061     {
3062     case PT_ANY:
3063     if (list != fallbacks)
3064     {
3065     if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3066     continue;
3067     }
3068     else if (cc[-1] == XCL_NOTPROP)
3069     continue;
3070     jump = JUMP(SLJIT_JUMP);
3071     break;
3072    
3073     case PT_LAMP:
3074     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3075     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3076     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3077     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3078     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3079     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3080     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3081     break;
3082    
3083     case PT_GC:
3084 ph10 836 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3085 ph10 664 SET_TYPE_OFFSET(c);
3086 ph10 836 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3087 ph10 664 break;
3088    
3089     case PT_PC:
3090     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3091     break;
3092    
3093     case PT_SC:
3094     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3095     break;
3096    
3097     case PT_SPACE:
3098     case PT_PXSPACE:
3099     if (*cc == PT_SPACE)
3100     {
3101     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3102     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3103     }
3104     SET_CHAR_OFFSET(9);
3105     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3106     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3107     if (*cc == PT_SPACE)
3108     JUMPHERE(jump);
3109    
3110     SET_TYPE_OFFSET(ucp_Zl);
3111     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3112     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3113     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3114     break;
3115    
3116     case PT_WORD:
3117     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3118     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3119     /* ... fall through */
3120    
3121     case PT_ALNUM:
3122     SET_TYPE_OFFSET(ucp_Ll);
3123     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3124     COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3125     SET_TYPE_OFFSET(ucp_Nd);
3126     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3127     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3128     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3129     break;
3130     }
3131     cc += 2;
3132     }
3133     #endif
3134    
3135     if (jump != NULL)
3136     add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3137     }
3138    
3139     if (found != NULL)
3140     set_jumps(found, LABEL());
3141     }
3142    
3143     #undef SET_TYPE_OFFSET
3144     #undef SET_CHAR_OFFSET
3145    
3146     #endif
3147    
3148 ph10 836 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3149 ph10 664 {
3150     DEFINE_COMPILER;
3151     int length;
3152     unsigned int c, oc, bit;
3153     compare_context context;
3154     struct sljit_jump *jump[4];
3155 ph10 836 #ifdef SUPPORT_UTF
3156 ph10 670 struct sljit_label *label;
3157 ph10 664 #ifdef SUPPORT_UCP
3158 ph10 836 pcre_uchar propdata[5];
3159 ph10 664 #endif
3160     #endif
3161    
3162     switch(type)
3163     {
3164     case OP_SOD:
3165     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3166     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3167     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3168     return cc;
3169    
3170     case OP_SOM:
3171     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3172     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3173     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3174     return cc;
3175    
3176     case OP_NOT_WORD_BOUNDARY:
3177     case OP_WORD_BOUNDARY:
3178     add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3179     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3180     return cc;
3181    
3182     case OP_NOT_DIGIT:
3183     case OP_DIGIT:
3184 zherczeg 914 fallback_at_str_end(common, fallbacks);
3185 ph10 664 read_char8_type(common);
3186     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3187     add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3188     return cc;
3189    
3190     case OP_NOT_WHITESPACE:
3191     case OP_WHITESPACE:
3192 zherczeg 914 fallback_at_str_end(common, fallbacks);
3193 ph10 664 read_char8_type(common);
3194     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3195     add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3196     return cc;
3197    
3198     case OP_NOT_WORDCHAR:
3199     case OP_WORDCHAR:
3200 zherczeg 914 fallback_at_str_end(common, fallbacks);
3201 ph10 664 read_char8_type(common);
3202     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3203     add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3204     return cc;
3205    
3206     case OP_ANY:
3207 zherczeg 914 fallback_at_str_end(common, fallbacks);
3208 ph10 664 read_char(common);
3209     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3210     {
3211     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3212 zherczeg 920 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3213 zherczeg 918 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3214     else
3215 zherczeg 920 jump[1] = check_str_end(common);
3216 zherczeg 918
3217 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3218 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3219 zherczeg 918 if (jump[1] != NULL)
3220     JUMPHERE(jump[1]);
3221 ph10 664 JUMPHERE(jump[0]);
3222     }
3223     else
3224     check_newlinechar(common, common->nltype, fallbacks, TRUE);
3225     return cc;
3226    
3227     case OP_ALLANY:
3228 zherczeg 914 fallback_at_str_end(common, fallbacks);
3229 ph10 836 #ifdef SUPPORT_UTF
3230     if (common->utf)
3231 ph10 664 {
3232 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3233     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3234     #ifdef COMPILE_PCRE8
3235 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3236 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3237 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3238 ph10 836 #else /* COMPILE_PCRE8 */
3239     #ifdef COMPILE_PCRE16
3240     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3241     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3242     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3243     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3244     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3245     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3246     #endif /* COMPILE_PCRE16 */
3247     #endif /* COMPILE_PCRE8 */
3248 zherczeg 736 JUMPHERE(jump[0]);
3249 ph10 664 return cc;
3250     }
3251     #endif
3252 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3253 ph10 664 return cc;
3254    
3255 zherczeg 736 case OP_ANYBYTE:
3256 zherczeg 914 fallback_at_str_end(common, fallbacks);
3257 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3258 zherczeg 736 return cc;
3259    
3260 ph10 836 #ifdef SUPPORT_UTF
3261 ph10 664 #ifdef SUPPORT_UCP
3262     case OP_NOTPROP:
3263     case OP_PROP:
3264     propdata[0] = 0;
3265     propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3266     propdata[2] = cc[0];
3267     propdata[3] = cc[1];
3268     propdata[4] = XCL_END;
3269     compile_xclass_hotpath(common, propdata, fallbacks);
3270     return cc + 2;
3271     #endif
3272     #endif
3273    
3274     case OP_ANYNL:
3275 zherczeg 914 fallback_at_str_end(common, fallbacks);
3276 ph10 664 read_char(common);
3277     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3278 zherczeg 918 /* We don't need to handle soft partial matching case. */
3279     if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3280     jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3281     else
3282     jump[1] = check_str_end(common);
3283 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3284 ph10 664 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3285 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3286 ph10 664 jump[3] = JUMP(SLJIT_JUMP);
3287     JUMPHERE(jump[0]);
3288     check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3289     JUMPHERE(jump[1]);
3290     JUMPHERE(jump[2]);
3291     JUMPHERE(jump[3]);
3292     return cc;
3293    
3294     case OP_NOT_HSPACE:
3295     case OP_HSPACE:
3296 zherczeg 914 fallback_at_str_end(common, fallbacks);
3297 ph10 664 read_char(common);
3298     add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3299     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3300     return cc;
3301    
3302     case OP_NOT_VSPACE:
3303     case OP_VSPACE:
3304 zherczeg 914 fallback_at_str_end(common, fallbacks);
3305 ph10 664 read_char(common);
3306     add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3307     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3308     return cc;
3309    
3310     #ifdef SUPPORT_UCP
3311     case OP_EXTUNI:
3312 zherczeg 914 fallback_at_str_end(common, fallbacks);
3313 ph10 664 read_char(common);
3314     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3315     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3316     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3317    
3318     label = LABEL();
3319     jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3320     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3321     read_char(common);
3322     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3323     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3324     CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3325    
3326     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3327     JUMPHERE(jump[0]);
3328 zherczeg 915 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3329     {
3330     jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3331 zherczeg 918 /* Since we successfully read a char above, partial matching must occure. */
3332     check_partial(common, TRUE);
3333 zherczeg 915 JUMPHERE(jump[0]);
3334     }
3335 ph10 664 return cc;
3336     #endif
3337    
3338     case OP_EODN:
3339 zherczeg 918 /* Requires rather complex checks. */
3340 ph10 664 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3341     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3342     {
3343 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3344     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3345 zherczeg 918 if (common->mode == JIT_COMPILE)
3346     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3347     else
3348     {
3349     jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3350     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3351     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3352     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3353     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3354     add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3355     check_partial(common, TRUE);
3356     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3357     JUMPHERE(jump[1]);
3358     }
3359 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3360 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3361     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3362     }
3363     else if (common->nltype == NLTYPE_FIXED)
3364     {
3365 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3366     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3367 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3368     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3369     }
3370     else
3371     {
3372 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3373 ph10 664 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3374 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3375 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3376     jump[2] = JUMP(SLJIT_C_GREATER);
3377     add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3378 ph10 836 /* Equal. */
3379     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3380 ph10 664 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3381     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3382    
3383     JUMPHERE(jump[1]);
3384     if (common->nltype == NLTYPE_ANYCRLF)
3385     {
3386 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3387 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3388     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3389     }
3390     else
3391     {
3392     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3393     read_char(common);
3394     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3395     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3396     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3397     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3398     }
3399     JUMPHERE(jump[2]);
3400     JUMPHERE(jump[3]);
3401     }
3402     JUMPHERE(jump[0]);
3403 zherczeg 918 check_partial(common, FALSE);
3404 ph10 664 return cc;
3405    
3406     case OP_EOD:
3407 zherczeg 918 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3408     check_partial(common, FALSE);
3409 ph10 664 return cc;
3410    
3411     case OP_CIRC:
3412     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3413     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3414     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3415     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3416     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3417     return cc;
3418    
3419     case OP_CIRCM:
3420     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3421     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3422     jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3423     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3424     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3425     jump[0] = JUMP(SLJIT_JUMP);
3426     JUMPHERE(jump[1]);
3427    
3428 zherczeg 914 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3429 ph10 664 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3430     {
3431 ph10 836 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3432 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3433 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3434     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3435 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3436     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3437     }
3438     else
3439     {
3440     skip_char_back(common);
3441     read_char(common);
3442     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3443     }
3444     JUMPHERE(jump[0]);
3445     return cc;
3446    
3447     case OP_DOLL:
3448     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3449     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3450     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3451    
3452     if (!common->endonly)
3453     compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3454     else
3455 zherczeg 914 {
3456 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3457 zherczeg 918 check_partial(common, FALSE);
3458 zherczeg 914 }
3459 ph10 664 return cc;
3460    
3461     case OP_DOLLM:
3462     jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3463     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3464     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3465     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3466 zherczeg 918 check_partial(common, FALSE);
3467 ph10 664 jump[0] = JUMP(SLJIT_JUMP);
3468     JUMPHERE(jump[1]);
3469    
3470     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3471     {
3472 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3473     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3474 zherczeg 918 if (common->mode == JIT_COMPILE)
3475     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3476     else
3477     {
3478     jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3479     /* STR_PTR = STR_END - IN_UCHARS(1) */
3480     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3481     check_partial(common, TRUE);
3482     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3483     JUMPHERE(jump[1]);
3484     }
3485    
3486 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3487 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3488     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3489     }
3490     else
3491     {
3492     peek_char(common);
3493     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3494     }
3495     JUMPHERE(jump[0]);
3496     return cc;
3497    
3498     case OP_CHAR:
3499     case OP_CHARI:
3500     length = 1;
3501 ph10 836 #ifdef SUPPORT_UTF
3502     if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3503 ph10 664 #endif
3504 zherczeg 914 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3505 ph10 664 {
3506 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3507 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3508    
3509 ph10 836 context.length = IN_UCHARS(length);
3510 ph10 664 context.sourcereg = -1;
3511     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3512 ph10 836 context.ucharptr = 0;
3513 ph10 664 #endif
3514     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3515     }
3516 zherczeg 914 fallback_at_str_end(common, fallbacks);
3517 ph10 664 read_char(common);
3518 ph10 836 #ifdef SUPPORT_UTF
3519     if (common->utf)
3520 ph10 664 {
3521     GETCHAR(c, cc);
3522     }
3523     else
3524     #endif
3525     c = *cc;
3526 zherczeg 914 if (type == OP_CHAR || !char_has_othercase(common, cc))
3527     {
3528     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3529     return cc + length;
3530     }
3531     oc = char_othercase(common, c);
3532     bit = c ^ oc;
3533     if (ispowerof2(bit))
3534     {
3535     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3536     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3537     return cc + length;
3538     }
3539 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3540     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3541     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3542     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3543     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3544     return cc + length;
3545    
3546     case OP_NOT:
3547     case OP_NOTI:
3548 zherczeg 914 fallback_at_str_end(common, fallbacks);
3549 ph10 664 length = 1;
3550 ph10 836 #ifdef SUPPORT_UTF
3551     if (common->utf)
3552 ph10 664 {
3553 ph10 836 #ifdef COMPILE_PCRE8
3554     c = *cc;
3555     if (c < 128)
3556 ph10 664 {
3557     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3558     if (type == OP_NOT || !char_has_othercase(common, cc))
3559     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3560     else
3561     {
3562     /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3563 zherczeg 736 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3564     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3565 ph10 664 }
3566     /* Skip the variable-length character. */
3567 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3568 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3569 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3570 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3571     JUMPHERE(jump[0]);
3572 ph10 836 return cc + 1;
3573 ph10 664 }
3574     else
3575 ph10 836 #endif /* COMPILE_PCRE8 */
3576     {
3577     GETCHARLEN(c, cc, length);
3578 ph10 664 read_char(common);
3579 ph10 836 }
3580 ph10 664 }
3581     else
3582 ph10 836 #endif /* SUPPORT_UTF */
3583 ph10 664 {
3584 ph10 836 read_char(common);
3585 ph10 664 c = *cc;
3586     }
3587    
3588     if (type == OP_NOT || !char_has_othercase(common, cc))
3589     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3590     else
3591     {
3592     oc = char_othercase(common, c);
3593     bit = c ^ oc;
3594     if (ispowerof2(bit))
3595     {
3596     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3597     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3598     }
3599     else
3600     {
3601     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3602     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3603     }
3604     }
3605 zherczeg 924 return cc + length;
3606 ph10 664
3607     case OP_CLASS:
3608     case OP_NCLASS:
3609 zherczeg 914 fallback_at_str_end(common, fallbacks);
3610 ph10 664 read_char(common);
3611 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3612 ph10 664 jump[0] = NULL;
3613 ph10 836 #ifdef COMPILE_PCRE8
3614     /* This check only affects 8 bit mode. In other modes, we
3615     always need to compare the value with 255. */
3616     if (common->utf)
3617     #endif /* COMPILE_PCRE8 */
3618 ph10 664 {
3619     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3620     if (type == OP_CLASS)
3621     {
3622     add_jump(compiler, fallbacks, jump[0]);
3623     jump[0] = NULL;
3624     }
3625     }
3626 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3627 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3628     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3629     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3630     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3631     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3632     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3633 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3634 ph10 664 if (jump[0] != NULL)
3635     JUMPHERE(jump[0]);
3636 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3637     return cc + 32 / sizeof(pcre_uchar);
3638 ph10 664
3639 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3640 ph10 664 case OP_XCLASS:
3641     compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3642     return cc + GET(cc, 0) - 1;
3643     #endif
3644    
3645     case OP_REVERSE:
3646     length = GET(cc, 0);
3647     SLJIT_ASSERT(length > 0);
3648     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3649 ph10 836 #ifdef SUPPORT_UTF
3650     if (common->utf)
3651 ph10 664 {
3652 ph10 836 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3653 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3654     label = LABEL();
3655 ph10 836 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3656 ph10 664 skip_char_back(common);
3657     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3658     JUMPTO(SLJIT_C_NOT_ZERO, label);
3659     }
3660 zherczeg 914 else
3661 ph10 664 #endif
3662 zherczeg 914 {
3663     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3664     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3665     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3666     }
3667     check_start_used_ptr(common);
3668 ph10 664 return cc + LINK_SIZE;
3669     }
3670     SLJIT_ASSERT_STOP();
3671     return cc;
3672     }
3673    
3674 ph10 836 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3675 ph10 664 {
3676     /* This function consumes at least one input character. */
3677     /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3678     DEFINE_COMPILER;
3679 ph10 836 pcre_uchar *ccbegin = cc;
3680 ph10 664 compare_context context;
3681     int size;
3682    
3683     context.length = 0;
3684     do
3685     {
3686     if (cc >= ccend)
3687     break;
3688    
3689     if (*cc == OP_CHAR)
3690     {
3691     size = 1;
3692 ph10 836 #ifdef SUPPORT_UTF
3693     if (common->utf && HAS_EXTRALEN(cc[1]))
3694     size += GET_EXTRALEN(cc[1]);
3695 ph10 664 #endif
3696     }
3697     else if (*cc == OP_CHARI)
3698     {
3699     size = 1;
3700 ph10 836 #ifdef SUPPORT_UTF
3701     if (common->utf)
3702 ph10 664 {
3703     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3704     size = 0;
3705 ph10 836 else if (HAS_EXTRALEN(cc[1]))
3706     size += GET_EXTRALEN(cc[1]);
3707 ph10 664 }
3708 ph10 691 else
3709 ph10 664 #endif
3710     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3711     size = 0;
3712     }
3713     else
3714     size = 0;
3715    
3716     cc += 1 + size;
3717 ph10 836 context.length += IN_UCHARS(size);
3718 ph10 664 }
3719     while (size > 0 && context.length <= 128);
3720    
3721     cc = ccbegin;
3722     if (context.length > 0)
3723     {
3724     /* We have a fixed-length byte sequence. */
3725     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3726     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3727    
3728     context.sourcereg = -1;
3729     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3730 ph10 836 context.ucharptr = 0;
3731 ph10 664 #endif
3732     do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3733     return cc;
3734     }
3735    
3736     /* A non-fixed length character will be checked if length == 0. */
3737     return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3738     }
3739    
3740 ph10 836 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3741 ph10 664 {
3742     DEFINE_COMPILER;
3743     int offset = GET2(cc, 1) << 1;
3744    
3745     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3746     if (!common->jscript_compat)
3747     {
3748     if (fallbacks == NULL)
3749     {
3750 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3751 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3752     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3753     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3754     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3755     return JUMP(SLJIT_C_NOT_ZERO);
3756     }
3757     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3758     }
3759     return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3760     }
3761    
3762     /* Forward definitions. */
3763 ph10 836 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3764 ph10 664 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3765    
3766     #define PUSH_FALLBACK(size, ccstart, error) \
3767     do \
3768     { \
3769     fallback = sljit_alloc_memory(compiler, (size)); \
3770     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3771     return error; \
3772     memset(fallback, 0, size); \
3773     fallback->prev = parent->top; \
3774     fallback->cc = (ccstart); \
3775     parent->top = fallback; \
3776     } \
3777     while (0)
3778    
3779     #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3780     do \
3781     { \
3782     fallback = sljit_alloc_memory(compiler, (size)); \
3783     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3784     return; \
3785     memset(fallback, 0, size); \
3786     fallback->prev = parent->top; \
3787     fallback->cc = (ccstart); \
3788     parent->top = fallback; \
3789     } \
3790     while (0)
3791    
3792 zherczeg 914 #define FALLBACK_AS(type) ((type *)fallback)
3793 ph10 664
3794 ph10 836 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3795 ph10 664 {
3796     DEFINE_COMPILER;
3797     int offset = GET2(cc, 1) << 1;
3798     struct sljit_jump *jump = NULL;
3799 zherczeg 915 struct sljit_jump *partial;
3800     struct sljit_jump *nopartial;
3801 ph10 664
3802     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3803 zherczeg 914 /* OVECTOR(1) contains the "string begin - 1" constant. */
3804 ph10 664 if (withchecks && !common->jscript_compat)
3805     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3806    
3807 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3808     if (common->utf && *cc == OP_REFI)
3809 ph10 664 {
3810     SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3811     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3812     if (withchecks)
3813     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3814    
3815     /* Needed to save important temporary registers. */
3816     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3817     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3818     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3819 ph10 836 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3820 ph10 664 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3821 zherczeg 915 if (common->mode == JIT_COMPILE)
3822     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3823     else
3824     {
3825     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3826     nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3827 zherczeg 918 check_partial(common, FALSE);
3828 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3829     JUMPHERE(nopartial);
3830     }
3831 ph10 664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3832     }
3833     else
3834 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3835 ph10 664 {
3836     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3837     if (withchecks)
3838     jump = JUMP(SLJIT_C_ZERO);
3839 zherczeg 914
3840 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3841 zherczeg 915 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3842     if (common->mode == JIT_COMPILE)
3843     add_jump(compiler, fallbacks, partial);
3844 ph10 664
3845     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3846     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3847 zherczeg 915
3848     if (common->mode != JIT_COMPILE)
3849     {
3850     nopartial = JUMP(SLJIT_JUMP);
3851     JUMPHERE(partial);
3852     /* TMP2 -= STR_END - STR_PTR */
3853     OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3854     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3855     partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3856     OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3857     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3858     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3859     JUMPHERE(partial);
3860 zherczeg 918 check_partial(common, FALSE);
3861 zherczeg 915 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3862     JUMPHERE(nopartial);
3863     }
3864 ph10 664 }
3865    
3866     if (jump != NULL)
3867     {
3868     if (emptyfail)
3869     add_jump(compiler, fallbacks, jump);
3870     else
3871     JUMPHERE(jump);
3872     }
3873 ph10 836 return cc + 1 + IMM2_SIZE;
3874 ph10 664 }
3875    
3876 ph10 836 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3877 ph10 664 {
3878     DEFINE_COMPILER;
3879     fallback_common *fallback;
3880 ph10 836 pcre_uchar type;
3881 ph10 664 struct sljit_label *label;
3882     struct sljit_jump *zerolength;
3883     struct sljit_jump *jump = NULL;
3884 ph10 836 pcre_uchar *ccbegin = cc;
3885 ph10 664 int min = 0, max = 0;
3886     BOOL minimize;
3887    
3888     PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3889    
3890 ph10 836 type = cc[1 + IMM2_SIZE];
3891 ph10 664 minimize = (type & 0x1) != 0;
3892     switch(type)
3893     {
3894     case OP_CRSTAR:
3895     case OP_CRMINSTAR:
3896     min = 0;
3897     max = 0;
3898 ph10 836 cc += 1 + IMM2_SIZE + 1;
3899 ph10 664 break;
3900     case OP_CRPLUS:
3901     case OP_CRMINPLUS:
3902     min = 1;
3903     max = 0;
3904 ph10 836 cc += 1 + IMM2_SIZE + 1;
3905 ph10 664 break;
3906     case OP_CRQUERY:
3907     case OP_CRMINQUERY:
3908     min = 0;
3909     max = 1;
3910 ph10 836 cc += 1 + IMM2_SIZE + 1;
3911 ph10 664 break;
3912     case OP_CRRANGE:
3913     case OP_CRMINRANGE:
3914 ph10 836 min = GET2(cc, 1 + IMM2_SIZE + 1);
3915     max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3916     cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3917 ph10 664 break;
3918     default:
3919     SLJIT_ASSERT_STOP();
3920     break;
3921     }
3922    
3923     if (!minimize)
3924     {
3925     if (min == 0)
3926     {
3927     allocate_stack(common, 2);
3928     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3929     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3930     /* Temporary release of STR_PTR. */
3931     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3932     zerolength = compile_ref_checks(common, ccbegin, NULL);
3933     /* Restore if not zero length. */
3934     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3935     }
3936     else
3937     {
3938     allocate_stack(common, 1);
3939     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3940     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3941     }
3942    
3943     if (min > 1 || max > 1)
3944     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3945    
3946     label = LABEL();
3947     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3948    
3949     if (min > 1 || max > 1)
3950     {
3951     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3952     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3953     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3954     if (min > 1)
3955     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3956     if (max > 1)
3957     {
3958     jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3959     allocate_stack(common, 1);
3960     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3961     JUMPTO(SLJIT_JUMP, label);
3962     JUMPHERE(jump);
3963     }
3964     }
3965    
3966     if (max == 0)
3967     {
3968     /* Includes min > 1 case as well. */
3969     allocate_stack(common, 1);
3970     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3971     JUMPTO(SLJIT_JUMP, label);
3972     }
3973    
3974     JUMPHERE(zerolength);
3975     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3976 ph10 677
3977     decrease_call_count(common);
3978 ph10 664 return cc;
3979     }
3980    
3981     allocate_stack(common, 2);
3982     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3983     if (type != OP_CRMINSTAR)
3984     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3985    
3986     if (min == 0)
3987     {
3988     zerolength = compile_ref_checks(common, ccbegin, NULL);
3989     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3990     jump = JUMP(SLJIT_JUMP);
3991     }
3992     else
3993     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3994    
3995     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3996     if (max > 0)
3997     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3998    
3999     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
4000     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4001    
4002     if (min > 1)
4003     {
4004     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4005     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4006     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4007     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
4008     }
4009     else if (max > 0)
4010     OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4011    
4012     if (jump != NULL)
4013     JUMPHERE(jump);
4014     JUMPHERE(zerolength);
4015 ph10 677
4016     decrease_call_count(common);
4017 ph10 664 return cc;
4018     }
4019    
4020 ph10 836 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4021 ph10 664 {
4022     DEFINE_COMPILER;
4023     fallback_common *fallback;
4024     recurse_entry *entry = common->entries;
4025     recurse_entry *prev = NULL;
4026     int start = GET(cc, 1);
4027    
4028     PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
4029     while (entry != NULL)
4030     {
4031     if (entry->start == start)
4032     break;
4033     prev = entry;
4034     entry = entry->next;
4035     }
4036    
4037     if (entry == NULL)
4038     {
4039     entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4040     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4041     return NULL;
4042     entry->next = NULL;
4043     entry->entry = NULL;
4044     entry->calls = NULL;
4045     entry->start = start;
4046    
4047     if (prev != NULL)
4048     prev->next = entry;
4049     else
4050     common->entries = entry;
4051     }
4052    
4053     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4054     allocate_stack(common, 1);
4055     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4056    
4057     if (entry->entry == NULL)
4058     add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4059     else
4060     JUMPTO(SLJIT_FAST_CALL, entry->entry);
4061     /* Leave if the match is failed. */
4062     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4063     return cc + 1 + LINK_SIZE;
4064     }
4065    
4066 ph10 836 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
4067 ph10 664 {
4068     DEFINE_COMPILER;
4069     int framesize;
4070     int localptr;
4071     fallback_common altfallback;
4072 ph10 836 pcre_uchar *ccbegin;
4073     pcre_uchar opcode;
4074     pcre_uchar bra = OP_BRA;
4075 ph10 664 jump_list *tmp = NULL;
4076     jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
4077     jump_list **found;
4078     /* Saving previous accept variables. */
4079     struct sljit_label *save_acceptlabel = common->acceptlabel;
4080     struct sljit_jump *jump;
4081     struct sljit_jump *brajump = NULL;
4082     jump_list *save_accept = common->accept;
4083    
4084     if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4085     {
4086     SLJIT_ASSERT(!conditional);
4087     bra = *cc;
4088     cc++;
4089     }
4090 ph10 836 localptr = PRIV_DATA(cc);
4091 ph10 664 SLJIT_ASSERT(localptr != 0);
4092     framesize = get_framesize(common, cc, FALSE);
4093