/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 883 - (hide annotations) (download)
Mon Jan 16 08:35:42 2012 UTC (2 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 212865 byte(s)
Fix compiler warnings
1 ph10 664 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 664
11     The machine code generator part (this module) was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 664
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47     #include "pcre_internal.h"
48    
49     #ifdef SUPPORT_JIT
50    
51     /* All-in-one: Since we use the JIT compiler only from here,
52     we just include it. This way we don't need to touch the build
53     system files. */
54    
55 ph10 836 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56     #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 ph10 664 #define SLJIT_CONFIG_AUTO 1
58 zherczeg 741 #define SLJIT_CONFIG_STATIC 1
59 ph10 664 #define SLJIT_VERBOSE 0
60     #define SLJIT_DEBUG 0
61    
62     #include "sljit/sljitLir.c"
63    
64     #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 ph10 836 #error Unsupported architecture
66 ph10 664 #endif
67    
68     /* Allocate memory on the stack. Fast, but limited size. */
69     #define LOCAL_SPACE_SIZE 32768
70    
71     #define STACK_GROWTH_RATE 8192
72    
73     /* Enable to check that the allocation could destroy temporaries. */
74     #if defined SLJIT_DEBUG && SLJIT_DEBUG
75     #define DESTROY_REGISTERS 1
76     #endif
77    
78     /*
79     Short summary about the backtracking mechanism empolyed by the jit code generator:
80    
81     The code generator follows the recursive nature of the PERL compatible regular
82     expressions. The basic blocks of regular expressions are condition checkers
83     whose execute different commands depending on the result of the condition check.
84     The relationship between the operators can be horizontal (concatenation) and
85     vertical (sub-expression) (See struct fallback_common for more details).
86    
87     'ab' - 'a' and 'b' regexps are concatenated
88     'a+' - 'a' is the sub-expression of the '+' operator
89    
90     The condition checkers are boolean (true/false) checkers. Machine code is generated
91     for the checker itself and for the actions depending on the result of the checker.
92     The 'true' case is called as the hot path (expected path), and the other is called as
93     the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94     branches on the hot path.
95    
96     Greedy star operator (*) :
97     Hot path: match happens.
98     Fallback path: match failed.
99     Non-greedy star operator (*?) :
100     Hot path: no need to perform a match.
101     Fallback path: match is required.
102    
103     The following example shows how the code generated for a capturing bracket
104     with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105     we have the following regular expression:
106    
107     A(B|C)D
108    
109     The generated code will be the following:
110    
111     A hot path
112     '(' hot path (pushing arguments to the stack)
113     B hot path
114     ')' hot path (pushing arguments to the stack)
115     D hot path
116     return with successful match
117    
118     D fallback path
119     ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120     B fallback path
121     C expected path
122     jump to D hot path
123     C fallback path
124     A fallback path
125 ph10 691
126 ph10 664 Notice, that the order of fallback code paths are the opposite of the fast
127     code paths. In this way the topmost value on the stack is always belong
128     to the current fallback code path. The fallback code path must check
129     whether there is a next alternative. If so, it needs to jump back to
130     the hot path eventually. Otherwise it needs to clear out its own stack
131     frame and continue the execution on the fallback code paths.
132     */
133    
134     /*
135     Saved stack frames:
136    
137     Atomic blocks and asserts require reloading the values of local variables
138     when the fallback mechanism performed. Because of OP_RECURSE, the locals
139     are not necessarly known in compile time, thus we need a dynamic restore
140     mechanism.
141    
142     The stack frames are stored in a chain list, and have the following format:
143     ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144    
145     Thus we can restore the locals to a particular point in the stack.
146     */
147    
148     typedef struct jit_arguments {
149     /* Pointers first. */
150     struct sljit_stack *stack;
151 ph10 836 const pcre_uchar *str;
152     const pcre_uchar *begin;
153     const pcre_uchar *end;
154 ph10 664 int *offsets;
155 ph10 836 pcre_uchar *ptr;
156 ph10 664 /* Everything else after. */
157     int offsetcount;
158 ph10 677 int calllimit;
159 ph10 836 pcre_uint8 notbol;
160     pcre_uint8 noteol;
161     pcre_uint8 notempty;
162     pcre_uint8 notempty_atstart;
163 ph10 664 } jit_arguments;
164    
165     typedef struct executable_function {
166     void *executable_func;
167 zherczeg 852 PUBL(jit_callback) callback;
168 ph10 664 void *userdata;
169 ph10 836 sljit_uw executable_size;
170 ph10 664 } executable_function;
171    
172     typedef struct jump_list {
173     struct sljit_jump *jump;
174     struct jump_list *next;
175     } jump_list;
176    
177 zherczeg 696 enum stub_types { stack_alloc };
178 ph10 664
179     typedef struct stub_list {
180     enum stub_types type;
181     int data;
182     struct sljit_jump *start;
183     struct sljit_label *leave;
184     struct stub_list *next;
185     } stub_list;
186    
187     typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188    
189     /* The following structure is the key data type for the recursive
190     code generator. It is allocated by compile_hotpath, and contains
191     the aguments for compile_fallbackpath. Must be the first member
192     of its descendants. */
193     typedef struct fallback_common {
194     /* Concatenation stack. */
195     struct fallback_common *prev;
196     jump_list *nextfallbacks;
197     /* Internal stack (for component operators). */
198     struct fallback_common *top;
199     jump_list *topfallbacks;
200     /* Opcode pointer. */
201 ph10 836 pcre_uchar *cc;
202 ph10 664 } fallback_common;
203    
204     typedef struct assert_fallback {
205     fallback_common common;
206     jump_list *condfailed;
207     /* Less than 0 (-1) if a frame is not needed. */
208     int framesize;
209     /* Points to our private memory word on the stack. */
210     int localptr;
211     /* For iterators. */
212     struct sljit_label *hotpath;
213     } assert_fallback;
214    
215     typedef struct bracket_fallback {
216     fallback_common common;
217     /* Where to coninue if an alternative is successfully matched. */
218     struct sljit_label *althotpath;
219     /* For rmin and rmax iterators. */
220     struct sljit_label *recursivehotpath;
221     /* For greedy ? operator. */
222     struct sljit_label *zerohotpath;
223     /* Contains the branches of a failed condition. */
224     union {
225     /* Both for OP_COND, OP_SCOND. */
226     jump_list *condfailed;
227     assert_fallback *assert;
228     /* For OP_ONCE. -1 if not needed. */
229     int framesize;
230     } u;
231     /* Points to our private memory word on the stack. */
232     int localptr;
233     } bracket_fallback;
234    
235     typedef struct bracketpos_fallback {
236     fallback_common common;
237     /* Points to our private memory word on the stack. */
238     int localptr;
239     /* Reverting stack is needed. */
240     int framesize;
241     /* Allocated stack size. */
242     int stacksize;
243     } bracketpos_fallback;
244    
245     typedef struct braminzero_fallback {
246     fallback_common common;
247     struct sljit_label *hotpath;
248     } braminzero_fallback;
249    
250     typedef struct iterator_fallback {
251     fallback_common common;
252     /* Next iteration. */
253     struct sljit_label *hotpath;
254     } iterator_fallback;
255    
256     typedef struct recurse_entry {
257     struct recurse_entry *next;
258     /* Contains the function entry. */
259     struct sljit_label *entry;
260     /* Collects the calls until the function is not created. */
261     jump_list *calls;
262     /* Points to the starting opcode. */
263     int start;
264     } recurse_entry;
265    
266     typedef struct recurse_fallback {
267     fallback_common common;
268     } recurse_fallback;
269    
270     typedef struct compiler_common {
271     struct sljit_compiler *compiler;
272 ph10 836 pcre_uchar *start;
273 ph10 664 int localsize;
274     int *localptrs;
275 ph10 836 const pcre_uint8 *fcc;
276 ph10 664 sljit_w lcc;
277     int cbraptr;
278     int nltype;
279     int newline;
280     int bsr_nltype;
281     int endonly;
282     sljit_w ctypes;
283 zherczeg 741 sljit_uw name_table;
284     sljit_w name_count;
285     sljit_w name_entry_size;
286 ph10 664 struct sljit_label *acceptlabel;
287     stub_list *stubs;
288     recurse_entry *entries;
289     recurse_entry *currententry;
290     jump_list *accept;
291 ph10 677 jump_list *calllimit;
292 ph10 664 jump_list *stackalloc;
293     jump_list *revertframes;
294     jump_list *wordboundary;
295     jump_list *anynewline;
296     jump_list *hspace;
297     jump_list *vspace;
298     jump_list *casefulcmp;
299     jump_list *caselesscmp;
300     BOOL jscript_compat;
301 ph10 836 #ifdef SUPPORT_UTF
302     BOOL utf;
303 ph10 664 #ifdef SUPPORT_UCP
304 ph10 836 BOOL use_ucp;
305 ph10 664 #endif
306 ph10 836 jump_list *utfreadchar;
307     #ifdef COMPILE_PCRE8
308     jump_list *utfreadtype8;
309 ph10 664 #endif
310 ph10 836 #endif /* SUPPORT_UTF */
311 ph10 664 #ifdef SUPPORT_UCP
312     jump_list *getucd;
313     #endif
314     } compiler_common;
315    
316     /* For byte_sequence_compare. */
317    
318     typedef struct compare_context {
319     int length;
320     int sourcereg;
321     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
322 ph10 836 int ucharptr;
323 ph10 664 union {
324 ph10 836 sljit_i asint;
325 zherczeg 847 sljit_uh asushort;
326 ph10 836 #ifdef COMPILE_PCRE8
327 ph10 664 sljit_ub asbyte;
328 ph10 836 sljit_ub asuchars[4];
329     #else
330     #ifdef COMPILE_PCRE16
331     sljit_uh asuchars[2];
332     #endif
333     #endif
334 ph10 664 } c;
335     union {
336 ph10 836 sljit_i asint;
337 zherczeg 847 sljit_uh asushort;
338 ph10 836 #ifdef COMPILE_PCRE8
339 ph10 664 sljit_ub asbyte;
340 ph10 836 sljit_ub asuchars[4];
341     #else
342     #ifdef COMPILE_PCRE16
343     sljit_uh asuchars[2];
344     #endif
345     #endif
346 ph10 664 } oc;
347     #endif
348     } compare_context;
349    
350     enum {
351     frame_end = 0,
352 zherczeg 696 frame_setstrbegin = -1
353 ph10 664 };
354    
355 zherczeg 883 /* Undefine sljit macros. */
356     #undef CMP
357    
358 ph10 664 /* Used for accessing the elements of the stack. */
359     #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
360    
361     #define TMP1 SLJIT_TEMPORARY_REG1
362     #define TMP2 SLJIT_TEMPORARY_REG3
363     #define TMP3 SLJIT_TEMPORARY_EREG2
364 zherczeg 880 #define STR_PTR SLJIT_SAVED_REG1
365     #define STR_END SLJIT_SAVED_REG2
366 ph10 664 #define STACK_TOP SLJIT_TEMPORARY_REG2
367 zherczeg 880 #define STACK_LIMIT SLJIT_SAVED_REG3
368     #define ARGUMENTS SLJIT_SAVED_EREG1
369     #define CALL_COUNT SLJIT_SAVED_EREG2
370 ph10 664 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
371    
372     /* Locals layout. */
373     /* These two locals can be used by the current opcode. */
374     #define LOCALS0 (0 * sizeof(sljit_w))
375     #define LOCALS1 (1 * sizeof(sljit_w))
376     /* Two local variables for possessive quantifiers (char1 cannot use them). */
377     #define POSSESSIVE0 (2 * sizeof(sljit_w))
378     #define POSSESSIVE1 (3 * sizeof(sljit_w))
379     /* Head of the last recursion. */
380 zherczeg 726 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
381 ph10 677 /* Max limit of recursions. */
382 zherczeg 726 #define CALL_LIMIT (5 * sizeof(sljit_w))
383 ph10 664 /* Last known position of the requested byte. */
384 ph10 836 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
385 ph10 664 /* End pointer of the first line. */
386 zherczeg 726 #define FIRSTLINE_END (7 * sizeof(sljit_w))
387 ph10 664 /* The output vector is stored on the stack, and contains pointers
388     to characters. The vector data is divided into two groups: the first
389     group contains the start / end character pointers, and the second is
390     the start pointers when the end of the capturing group has not yet reached. */
391 zherczeg 726 #define OVECTOR_START (8 * sizeof(sljit_w))
392 ph10 664 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
393     #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
394 ph10 836 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
395 ph10 664
396 ph10 836 #ifdef COMPILE_PCRE8
397     #define MOV_UCHAR SLJIT_MOV_UB
398     #define MOVU_UCHAR SLJIT_MOVU_UB
399     #else
400     #ifdef COMPILE_PCRE16
401     #define MOV_UCHAR SLJIT_MOV_UH
402     #define MOVU_UCHAR SLJIT_MOVU_UH
403     #else
404     #error Unsupported compiling mode
405     #endif
406     #endif
407    
408 ph10 664 /* Shortcuts. */
409     #define DEFINE_COMPILER \
410     struct sljit_compiler *compiler = common->compiler
411     #define OP1(op, dst, dstw, src, srcw) \
412     sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
413     #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
414     sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
415     #define LABEL() \
416     sljit_emit_label(compiler)
417     #define JUMP(type) \
418     sljit_emit_jump(compiler, (type))
419     #define JUMPTO(type, label) \
420     sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
421     #define JUMPHERE(jump) \
422     sljit_set_label((jump), sljit_emit_label(compiler))
423     #define CMP(type, src1, src1w, src2, src2w) \
424     sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
425     #define CMPTO(type, src1, src1w, src2, src2w, label) \
426     sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
427     #define COND_VALUE(op, dst, dstw, type) \
428     sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
429    
430 ph10 836 static pcre_uchar* bracketend(pcre_uchar* cc)
431 ph10 664 {
432     SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
433     do cc += GET(cc, 1); while (*cc == OP_ALT);
434     SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
435     cc += 1 + LINK_SIZE;
436     return cc;
437     }
438    
439 ph10 691 /* Functions whose might need modification for all new supported opcodes:
440 ph10 664 next_opcode
441     get_localspace
442     set_localptrs
443     get_framesize
444     init_frame
445     get_localsize
446     copy_locals
447     compile_hotpath
448     compile_fallbackpath
449     */
450    
451 ph10 836 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
452 ph10 664 {
453     SLJIT_UNUSED_ARG(common);
454     switch(*cc)
455     {
456     case OP_SOD:
457     case OP_SOM:
458     case OP_SET_SOM:
459     case OP_NOT_WORD_BOUNDARY:
460     case OP_WORD_BOUNDARY:
461     case OP_NOT_DIGIT:
462     case OP_DIGIT:
463     case OP_NOT_WHITESPACE:
464     case OP_WHITESPACE:
465     case OP_NOT_WORDCHAR:
466     case OP_WORDCHAR:
467     case OP_ANY:
468     case OP_ALLANY:
469     case OP_ANYNL:
470     case OP_NOT_HSPACE:
471     case OP_HSPACE:
472     case OP_NOT_VSPACE:
473     case OP_VSPACE:
474     case OP_EXTUNI:
475     case OP_EODN:
476     case OP_EOD:
477     case OP_CIRC:
478     case OP_CIRCM:
479     case OP_DOLL:
480     case OP_DOLLM:
481     case OP_TYPESTAR:
482     case OP_TYPEMINSTAR:
483     case OP_TYPEPLUS:
484     case OP_TYPEMINPLUS:
485     case OP_TYPEQUERY:
486     case OP_TYPEMINQUERY:
487     case OP_TYPEPOSSTAR:
488     case OP_TYPEPOSPLUS:
489     case OP_TYPEPOSQUERY:
490     case OP_CRSTAR:
491     case OP_CRMINSTAR:
492     case OP_CRPLUS:
493     case OP_CRMINPLUS:
494     case OP_CRQUERY:
495     case OP_CRMINQUERY:
496     case OP_DEF:
497     case OP_BRAZERO:
498     case OP_BRAMINZERO:
499     case OP_BRAPOSZERO:
500     case OP_FAIL:
501     case OP_ACCEPT:
502     case OP_ASSERT_ACCEPT:
503     case OP_SKIPZERO:
504     return cc + 1;
505    
506 zherczeg 736 case OP_ANYBYTE:
507 ph10 836 #ifdef SUPPORT_UTF
508     if (common->utf) return NULL;
509 zherczeg 736 #endif
510     return cc + 1;
511    
512 ph10 664 case OP_CHAR:
513     case OP_CHARI:
514     case OP_NOT:
515     case OP_NOTI:
516     case OP_STAR:
517     case OP_MINSTAR:
518     case OP_PLUS:
519     case OP_MINPLUS:
520     case OP_QUERY:
521     case OP_MINQUERY:
522     case OP_POSSTAR:
523     case OP_POSPLUS:
524     case OP_POSQUERY:
525     case OP_STARI:
526     case OP_MINSTARI:
527     case OP_PLUSI:
528     case OP_MINPLUSI:
529     case OP_QUERYI:
530     case OP_MINQUERYI:
531     case OP_POSSTARI:
532     case OP_POSPLUSI:
533     case OP_POSQUERYI:
534     case OP_NOTSTAR:
535     case OP_NOTMINSTAR:
536     case OP_NOTPLUS:
537     case OP_NOTMINPLUS:
538     case OP_NOTQUERY:
539     case OP_NOTMINQUERY:
540     case OP_NOTPOSSTAR:
541     case OP_NOTPOSPLUS:
542     case OP_NOTPOSQUERY:
543     case OP_NOTSTARI:
544     case OP_NOTMINSTARI:
545     case OP_NOTPLUSI:
546     case OP_NOTMINPLUSI:
547     case OP_NOTQUERYI:
548     case OP_NOTMINQUERYI:
549     case OP_NOTPOSSTARI:
550     case OP_NOTPOSPLUSI:
551     case OP_NOTPOSQUERYI:
552     cc += 2;
553 ph10 836 #ifdef SUPPORT_UTF
554     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
555 ph10 664 #endif
556     return cc;
557    
558     case OP_UPTO:
559     case OP_MINUPTO:
560     case OP_EXACT:
561     case OP_POSUPTO:
562     case OP_UPTOI:
563     case OP_MINUPTOI:
564     case OP_EXACTI:
565     case OP_POSUPTOI:
566     case OP_NOTUPTO:
567     case OP_NOTMINUPTO:
568     case OP_NOTEXACT:
569     case OP_NOTPOSUPTO:
570     case OP_NOTUPTOI:
571     case OP_NOTMINUPTOI:
572     case OP_NOTEXACTI:
573     case OP_NOTPOSUPTOI:
574 ph10 836 cc += 2 + IMM2_SIZE;
575     #ifdef SUPPORT_UTF
576     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
577 ph10 664 #endif
578     return cc;
579    
580     case OP_NOTPROP:
581     case OP_PROP:
582 ph10 836 return cc + 1 + 2;
583    
584 ph10 664 case OP_TYPEUPTO:
585     case OP_TYPEMINUPTO:
586     case OP_TYPEEXACT:
587     case OP_TYPEPOSUPTO:
588     case OP_REF:
589     case OP_REFI:
590     case OP_CREF:
591 zherczeg 741 case OP_NCREF:
592     case OP_RREF:
593     case OP_NRREF:
594 ph10 664 case OP_CLOSE:
595 ph10 836 cc += 1 + IMM2_SIZE;
596 ph10 664 return cc;
597    
598     case OP_CRRANGE:
599     case OP_CRMINRANGE:
600 ph10 836 return cc + 1 + 2 * IMM2_SIZE;
601 ph10 664
602     case OP_CLASS:
603     case OP_NCLASS:
604 ph10 836 return cc + 1 + 32 / sizeof(pcre_uchar);
605 ph10 664
606 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
607 ph10 664 case OP_XCLASS:
608     return cc + GET(cc, 1);
609     #endif
610    
611     case OP_RECURSE:
612     case OP_ASSERT:
613     case OP_ASSERT_NOT:
614     case OP_ASSERTBACK:
615     case OP_ASSERTBACK_NOT:
616     case OP_REVERSE:
617     case OP_ONCE:
618 zherczeg 726 case OP_ONCE_NC:
619 ph10 664 case OP_BRA:
620     case OP_BRAPOS:
621     case OP_COND:
622     case OP_SBRA:
623     case OP_SBRAPOS:
624     case OP_SCOND:
625     case OP_ALT:
626     case OP_KET:
627     case OP_KETRMAX:
628     case OP_KETRMIN:
629     case OP_KETRPOS:
630     return cc + 1 + LINK_SIZE;
631    
632     case OP_CBRA:
633     case OP_CBRAPOS:
634     case OP_SCBRA:
635     case OP_SCBRAPOS:
636 ph10 836 return cc + 1 + LINK_SIZE + IMM2_SIZE;
637 ph10 664
638     default:
639     return NULL;
640     }
641     }
642    
643 ph10 836 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
644 ph10 664 {
645     int localspace = 0;
646 ph10 836 pcre_uchar *alternative;
647 ph10 664 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
648     while (cc < ccend)
649     {
650     switch(*cc)
651     {
652     case OP_ASSERT:
653     case OP_ASSERT_NOT:
654     case OP_ASSERTBACK:
655     case OP_ASSERTBACK_NOT:
656     case OP_ONCE:
657 zherczeg 726 case OP_ONCE_NC:
658 ph10 664 case OP_BRAPOS:
659     case OP_SBRA:
660     case OP_SBRAPOS:
661     case OP_SCOND:
662     localspace += sizeof(sljit_w);
663     cc += 1 + LINK_SIZE;
664     break;
665    
666     case OP_CBRAPOS:
667     case OP_SCBRAPOS:
668     localspace += sizeof(sljit_w);
669 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
670 ph10 664 break;
671    
672     case OP_COND:
673     /* Might be a hidden SCOND. */
674     alternative = cc + GET(cc, 1);
675     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
676     localspace += sizeof(sljit_w);
677     cc += 1 + LINK_SIZE;
678     break;
679    
680     default:
681     cc = next_opcode(common, cc);
682     if (cc == NULL)
683     return -1;
684     break;
685     }
686     }
687     return localspace;
688     }
689    
690 ph10 836 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
691 ph10 664 {
692 ph10 836 pcre_uchar *cc = common->start;
693     pcre_uchar *alternative;
694 ph10 664 while (cc < ccend)
695     {
696     switch(*cc)
697     {
698     case OP_ASSERT:
699     case OP_ASSERT_NOT:
700     case OP_ASSERTBACK:
701     case OP_ASSERTBACK_NOT:
702     case OP_ONCE:
703 zherczeg 726 case OP_ONCE_NC:
704 ph10 664 case OP_BRAPOS:
705     case OP_SBRA:
706     case OP_SBRAPOS:
707     case OP_SCOND:
708     common->localptrs[cc - common->start] = localptr;
709     localptr += sizeof(sljit_w);
710     cc += 1 + LINK_SIZE;
711     break;
712    
713     case OP_CBRAPOS:
714     case OP_SCBRAPOS:
715     common->localptrs[cc - common->start] = localptr;
716     localptr += sizeof(sljit_w);
717 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
718 ph10 664 break;
719    
720     case OP_COND:
721     /* Might be a hidden SCOND. */
722     alternative = cc + GET(cc, 1);
723     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
724     {
725     common->localptrs[cc - common->start] = localptr;
726     localptr += sizeof(sljit_w);
727     }
728     cc += 1 + LINK_SIZE;
729     break;
730    
731     default:
732     cc = next_opcode(common, cc);
733     SLJIT_ASSERT(cc != NULL);
734     break;
735     }
736     }
737     }
738    
739     /* Returns with -1 if no need for frame. */
740 ph10 836 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
741 ph10 664 {
742 ph10 836 pcre_uchar *ccend = bracketend(cc);
743 ph10 664 int length = 0;
744     BOOL possessive = FALSE;
745     BOOL setsom_found = FALSE;
746    
747     if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
748     {
749 zherczeg 696 length = 3;
750 ph10 664 possessive = TRUE;
751     }
752    
753     cc = next_opcode(common, cc);
754     SLJIT_ASSERT(cc != NULL);
755     while (cc < ccend)
756     switch(*cc)
757     {
758     case OP_SET_SOM:
759     case OP_RECURSE:
760     if (!setsom_found)
761     {
762     length += 2;
763     setsom_found = TRUE;
764     }
765     cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
766     break;
767    
768     case OP_CBRA:
769     case OP_CBRAPOS:
770     case OP_SCBRA:
771     case OP_SCBRAPOS:
772     length += 3;
773 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
774 ph10 664 break;
775    
776     default:
777     cc = next_opcode(common, cc);
778     SLJIT_ASSERT(cc != NULL);
779     break;
780     }
781    
782     /* Possessive quantifiers can use a special case. */
783 zherczeg 726 if (SLJIT_UNLIKELY(possessive) && length == 3)
784 ph10 664 return -1;
785    
786     if (length > 0)
787 zherczeg 726 return length + 1;
788     return -1;
789 ph10 664 }
790    
791 ph10 836 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
792 ph10 664 {
793     DEFINE_COMPILER;
794 ph10 836 pcre_uchar *ccend = bracketend(cc);
795 ph10 664 BOOL setsom_found = FALSE;
796     int offset;
797    
798 zherczeg 726 /* >= 1 + shortest item size (2) */
799     SLJIT_ASSERT(stackpos >= stacktop + 2);
800 ph10 664
801     stackpos = STACK(stackpos);
802     if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
803     cc = next_opcode(common, cc);
804     SLJIT_ASSERT(cc != NULL);
805     while (cc < ccend)
806     switch(*cc)
807     {
808     case OP_SET_SOM:
809     case OP_RECURSE:
810     if (!setsom_found)
811     {
812     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
813     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
814     stackpos += (int)sizeof(sljit_w);
815     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
816     stackpos += (int)sizeof(sljit_w);
817     setsom_found = TRUE;
818     }
819     cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
820     break;
821    
822     case OP_CBRA:
823     case OP_CBRAPOS:
824     case OP_SCBRA:
825     case OP_SCBRAPOS:
826     offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
827     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
828     stackpos += (int)sizeof(sljit_w);
829     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
830     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
831     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
832     stackpos += (int)sizeof(sljit_w);
833     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
834     stackpos += (int)sizeof(sljit_w);
835    
836 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
837 ph10 664 break;
838    
839     default:
840     cc = next_opcode(common, cc);
841     SLJIT_ASSERT(cc != NULL);
842     break;
843     }
844    
845     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
846 zherczeg 726 SLJIT_ASSERT(stackpos == STACK(stacktop));
847 ph10 664 }
848    
849 ph10 836 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
850 ph10 664 {
851     int localsize = 2;
852 ph10 836 pcre_uchar *alternative;
853 ph10 664 /* Calculate the sum of the local variables. */
854     while (cc < ccend)
855     {
856     switch(*cc)
857     {
858     case OP_ASSERT:
859     case OP_ASSERT_NOT:
860     case OP_ASSERTBACK:
861     case OP_ASSERTBACK_NOT:
862     case OP_ONCE:
863 zherczeg 726 case OP_ONCE_NC:
864 ph10 664 case OP_BRAPOS:
865     case OP_SBRA:
866     case OP_SBRAPOS:
867     case OP_SCOND:
868     localsize++;
869     cc += 1 + LINK_SIZE;
870     break;
871    
872     case OP_CBRA:
873     case OP_SCBRA:
874     localsize++;
875 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
876 ph10 664 break;
877    
878     case OP_CBRAPOS:
879     case OP_SCBRAPOS:
880     localsize += 2;
881 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
882 ph10 664 break;
883    
884     case OP_COND:
885     /* Might be a hidden SCOND. */
886     alternative = cc + GET(cc, 1);
887     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
888     localsize++;
889     cc += 1 + LINK_SIZE;
890     break;
891    
892     default:
893     cc = next_opcode(common, cc);
894     SLJIT_ASSERT(cc != NULL);
895     break;
896     }
897     }
898     SLJIT_ASSERT(cc == ccend);
899     return localsize;
900     }
901    
902 ph10 836 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
903 ph10 664 BOOL save, int stackptr, int stacktop)
904     {
905     DEFINE_COMPILER;
906     int srcw[2];
907     int count;
908     BOOL tmp1next = TRUE;
909     BOOL tmp1empty = TRUE;
910     BOOL tmp2empty = TRUE;
911 ph10 836 pcre_uchar *alternative;
912 ph10 664 enum {
913     start,
914     loop,
915     end
916     } status;
917    
918     status = save ? start : loop;
919     stackptr = STACK(stackptr - 2);
920     stacktop = STACK(stacktop - 1);
921    
922     if (!save)
923     {
924     stackptr += sizeof(sljit_w);
925     if (stackptr < stacktop)
926     {
927     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
928     stackptr += sizeof(sljit_w);
929     tmp1empty = FALSE;
930     }
931     if (stackptr < stacktop)
932     {
933     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
934     stackptr += sizeof(sljit_w);
935     tmp2empty = FALSE;
936     }
937     /* The tmp1next must be TRUE in either way. */
938     }
939    
940     while (status != end)
941     {
942     count = 0;
943     switch(status)
944     {
945     case start:
946     SLJIT_ASSERT(save);
947     count = 1;
948     srcw[0] = RECURSIVE_HEAD;
949     status = loop;
950     break;
951    
952     case loop:
953     if (cc >= ccend)
954     {
955     status = end;
956     break;
957     }
958    
959     switch(*cc)
960     {
961     case OP_ASSERT:
962     case OP_ASSERT_NOT:
963     case OP_ASSERTBACK:
964     case OP_ASSERTBACK_NOT:
965     case OP_ONCE:
966 zherczeg 726 case OP_ONCE_NC:
967 ph10 664 case OP_BRAPOS:
968     case OP_SBRA:
969     case OP_SBRAPOS:
970     case OP_SCOND:
971     count = 1;
972 ph10 836 srcw[0] = PRIV_DATA(cc);
973 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
974     cc += 1 + LINK_SIZE;
975     break;
976    
977     case OP_CBRA:
978     case OP_SCBRA:
979     count = 1;
980     srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
981 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
982 ph10 664 break;
983    
984     case OP_CBRAPOS:
985     case OP_SCBRAPOS:
986     count = 2;
987     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
988 ph10 836 srcw[0] = PRIV_DATA(cc);
989 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
990 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
991 ph10 664 break;
992    
993     case OP_COND:
994     /* Might be a hidden SCOND. */
995     alternative = cc + GET(cc, 1);
996     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
997     {
998     count = 1;
999 ph10 836 srcw[0] = PRIV_DATA(cc);
1000 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1001     }
1002     cc += 1 + LINK_SIZE;
1003     break;
1004    
1005     default:
1006     cc = next_opcode(common, cc);
1007     SLJIT_ASSERT(cc != NULL);
1008     break;
1009     }
1010     break;
1011    
1012     case end:
1013     SLJIT_ASSERT_STOP();
1014     break;
1015     }
1016    
1017     while (count > 0)
1018     {
1019     count--;
1020     if (save)
1021     {
1022     if (tmp1next)
1023     {
1024     if (!tmp1empty)
1025     {
1026     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1027     stackptr += sizeof(sljit_w);
1028     }
1029     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1030     tmp1empty = FALSE;
1031     tmp1next = FALSE;
1032     }
1033     else
1034     {
1035     if (!tmp2empty)
1036     {
1037     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1038     stackptr += sizeof(sljit_w);
1039     }
1040     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1041     tmp2empty = FALSE;
1042     tmp1next = TRUE;
1043     }
1044     }
1045     else
1046     {
1047     if (tmp1next)
1048     {
1049     SLJIT_ASSERT(!tmp1empty);
1050     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1051     tmp1empty = stackptr >= stacktop;
1052     if (!tmp1empty)
1053     {
1054     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1055     stackptr += sizeof(sljit_w);
1056     }
1057     tmp1next = FALSE;
1058     }
1059     else
1060     {
1061     SLJIT_ASSERT(!tmp2empty);
1062     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1063     tmp2empty = stackptr >= stacktop;
1064     if (!tmp2empty)
1065     {
1066     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1067     stackptr += sizeof(sljit_w);
1068     }
1069     tmp1next = TRUE;
1070     }
1071     }
1072     }
1073     }
1074    
1075     if (save)
1076     {
1077     if (tmp1next)
1078     {
1079     if (!tmp1empty)
1080     {
1081     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1082     stackptr += sizeof(sljit_w);
1083     }
1084     if (!tmp2empty)
1085     {
1086     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1087     stackptr += sizeof(sljit_w);
1088     }
1089     }
1090     else
1091     {
1092     if (!tmp2empty)
1093     {
1094     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1095     stackptr += sizeof(sljit_w);
1096     }
1097     if (!tmp1empty)
1098     {
1099     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1100     stackptr += sizeof(sljit_w);
1101     }
1102     }
1103     }
1104     SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1105     }
1106    
1107     static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1108     {
1109     return (value & (value - 1)) == 0;
1110     }
1111    
1112     static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1113     {
1114     while (list)
1115     {
1116     /* sljit_set_label is clever enough to do nothing
1117     if either the jump or the label is NULL */
1118     sljit_set_label(list->jump, label);
1119     list = list->next;
1120     }
1121     }
1122    
1123     static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1124     {
1125     jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1126     if (list_item)
1127     {
1128     list_item->next = *list;
1129     list_item->jump = jump;
1130     *list = list_item;
1131     }
1132     }
1133    
1134     static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1135     {
1136     DEFINE_COMPILER;
1137     stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1138    
1139     if (list_item)
1140     {
1141     list_item->type = type;
1142     list_item->data = data;
1143     list_item->start = start;
1144     list_item->leave = LABEL();
1145     list_item->next = common->stubs;
1146     common->stubs = list_item;
1147     }
1148     }
1149    
1150     static void flush_stubs(compiler_common *common)
1151     {
1152     DEFINE_COMPILER;
1153     stub_list* list_item = common->stubs;
1154    
1155     while (list_item)
1156     {
1157     JUMPHERE(list_item->start);
1158     switch(list_item->type)
1159     {
1160     case stack_alloc:
1161     add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1162     break;
1163     }
1164     JUMPTO(SLJIT_JUMP, list_item->leave);
1165     list_item = list_item->next;
1166     }
1167     common->stubs = NULL;
1168     }
1169    
1170 ph10 677 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1171     {
1172     DEFINE_COMPILER;
1173    
1174 zherczeg 695 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1175 ph10 677 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1176     }
1177    
1178 ph10 664 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1179     {
1180     /* May destroy all locals and registers except TMP2. */
1181     DEFINE_COMPILER;
1182    
1183     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1184     #ifdef DESTROY_REGISTERS
1185     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1186     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1187     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1188     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1189     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1190     #endif
1191     add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1192     }
1193    
1194     static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1195     {
1196     DEFINE_COMPILER;
1197     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1198     }
1199    
1200     static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1201     {
1202     DEFINE_COMPILER;
1203     struct sljit_label *loop;
1204     int i;
1205     /* At this point we can freely use all temporary registers. */
1206     /* TMP1 returns with begin - 1. */
1207 zherczeg 880 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1208 ph10 664 if (length < 8)
1209     {
1210     for (i = 0; i < length; i++)
1211     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1212     }
1213     else
1214     {
1215     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1216     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1217     loop = LABEL();
1218     OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1219     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1220     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1221     }
1222     }
1223    
1224 zherczeg 696 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1225 ph10 664 {
1226     DEFINE_COMPILER;
1227     struct sljit_label *loop;
1228     struct sljit_jump *earlyexit;
1229    
1230     /* At this point we can freely use all registers. */
1231 zherczeg 880 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1232 zherczeg 696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1233    
1234 ph10 664 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1235     OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1236     OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1237     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1238 zherczeg 880 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1239 ph10 664 /* Unlikely, but possible */
1240     earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1241     loop = LABEL();
1242 zherczeg 880 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1243     OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1244 ph10 664 /* Copy the integer value to the output buffer */
1245 ph10 836 #ifdef COMPILE_PCRE16
1246 zherczeg 880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1247 ph10 836 #endif
1248 zherczeg 880 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1249 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1250     JUMPTO(SLJIT_C_NOT_ZERO, loop);
1251     JUMPHERE(earlyexit);
1252 zherczeg 696
1253     /* Calculate the return value, which is the maximum ovector value. */
1254     if (topbracket > 1)
1255     {
1256     OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1257     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1258    
1259 zherczeg 880 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1260 zherczeg 696 loop = LABEL();
1261 zherczeg 715 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1262 zherczeg 696 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1263 zherczeg 880 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1264 zherczeg 696 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1265     }
1266     else
1267     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1268 ph10 664 }
1269    
1270 ph10 836 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1271 ph10 664 {
1272     /* Detects if the character has an othercase. */
1273     unsigned int c;
1274    
1275 ph10 836 #ifdef SUPPORT_UTF
1276     if (common->utf)
1277 ph10 664 {
1278     GETCHAR(c, cc);
1279     if (c > 127)
1280     {
1281     #ifdef SUPPORT_UCP
1282     return c != UCD_OTHERCASE(c);
1283     #else
1284     return FALSE;
1285     #endif
1286     }
1287 ph10 836 #ifndef COMPILE_PCRE8
1288     return common->fcc[c] != c;
1289     #endif
1290 ph10 664 }
1291     else
1292     #endif
1293     c = *cc;
1294 ph10 836 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1295 ph10 664 }
1296    
1297     static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1298     {
1299     /* Returns with the othercase. */
1300 ph10 836 #ifdef SUPPORT_UTF
1301     if (common->utf && c > 127)
1302 ph10 664 {
1303     #ifdef SUPPORT_UCP
1304     return UCD_OTHERCASE(c);
1305     #else
1306     return c;
1307     #endif
1308     }
1309     #endif
1310 ph10 836 return TABLE_GET(c, common->fcc, c);
1311 ph10 664 }
1312    
1313 ph10 836 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1314 ph10 664 {
1315     /* Detects if the character and its othercase has only 1 bit difference. */
1316     unsigned int c, oc, bit;
1317 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1318 ph10 664 int n;
1319     #endif
1320    
1321 ph10 836 #ifdef SUPPORT_UTF
1322     if (common->utf)
1323 ph10 664 {
1324     GETCHAR(c, cc);
1325     if (c <= 127)
1326     oc = common->fcc[c];
1327     else
1328     {
1329     #ifdef SUPPORT_UCP
1330     oc = UCD_OTHERCASE(c);
1331     #else
1332     oc = c;
1333     #endif
1334     }
1335     }
1336     else
1337     {
1338     c = *cc;
1339 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1340 ph10 664 }
1341     #else
1342     c = *cc;
1343 ph10 836 oc = TABLE_GET(c, common->fcc, c);
1344 ph10 664 #endif
1345    
1346     SLJIT_ASSERT(c != oc);
1347    
1348     bit = c ^ oc;
1349     /* Optimized for English alphabet. */
1350     if (c <= 127 && bit == 0x20)
1351     return (0 << 8) | 0x20;
1352    
1353     /* Since c != oc, they must have at least 1 bit difference. */
1354     if (!ispowerof2(bit))
1355     return 0;
1356    
1357 ph10 836 #ifdef COMPILE_PCRE8
1358    
1359     #ifdef SUPPORT_UTF
1360     if (common->utf && c > 127)
1361 ph10 664 {
1362 ph10 836 n = GET_EXTRALEN(*cc);
1363 ph10 664 while ((bit & 0x3f) == 0)
1364     {
1365     n--;
1366     bit >>= 6;
1367     }
1368     return (n << 8) | bit;
1369     }
1370 ph10 836 #endif /* SUPPORT_UTF */
1371 ph10 664 return (0 << 8) | bit;
1372 ph10 836
1373     #else /* COMPILE_PCRE8 */
1374    
1375     #ifdef COMPILE_PCRE16
1376     #ifdef SUPPORT_UTF
1377     if (common->utf && c > 65535)
1378     {
1379     if (bit >= (1 << 10))
1380     bit >>= 10;
1381     else
1382     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1383     }
1384     #endif /* SUPPORT_UTF */
1385     return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1386     #endif /* COMPILE_PCRE16 */
1387    
1388     #endif /* COMPILE_PCRE8 */
1389 ph10 664 }
1390    
1391     static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1392     {
1393     DEFINE_COMPILER;
1394     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1395     }
1396    
1397     static void read_char(compiler_common *common)
1398     {
1399     /* Reads the character into TMP1, updates STR_PTR.
1400     Does not check STR_END. TMP2 Destroyed. */
1401     DEFINE_COMPILER;
1402 ph10 836 #ifdef SUPPORT_UTF
1403 ph10 664 struct sljit_jump *jump;
1404     #endif
1405    
1406 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1407     #ifdef SUPPORT_UTF
1408     if (common->utf)
1409 ph10 664 {
1410 ph10 836 #ifdef COMPILE_PCRE8
1411 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1412 ph10 836 #else
1413     #ifdef COMPILE_PCRE16
1414     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1415     #endif
1416     #endif /* COMPILE_PCRE8 */
1417     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1418 ph10 664 JUMPHERE(jump);
1419     }
1420     #endif
1421 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1422 ph10 664 }
1423    
1424     static void peek_char(compiler_common *common)
1425     {
1426     /* Reads the character into TMP1, keeps STR_PTR.
1427     Does not check STR_END. TMP2 Destroyed. */
1428     DEFINE_COMPILER;
1429 ph10 836 #ifdef SUPPORT_UTF
1430 ph10 664 struct sljit_jump *jump;
1431     #endif
1432    
1433 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1434     #ifdef SUPPORT_UTF
1435     if (common->utf)
1436 ph10 664 {
1437 ph10 836 #ifdef COMPILE_PCRE8
1438 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1439 ph10 836 #else
1440     #ifdef COMPILE_PCRE16
1441     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1442     #endif
1443     #endif /* COMPILE_PCRE8 */
1444     add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1445 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1446     JUMPHERE(jump);
1447     }
1448     #endif
1449     }
1450    
1451     static void read_char8_type(compiler_common *common)
1452     {
1453     /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1454     DEFINE_COMPILER;
1455 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1456 ph10 664 struct sljit_jump *jump;
1457     #endif
1458    
1459 ph10 836 #ifdef SUPPORT_UTF
1460     if (common->utf)
1461 ph10 664 {
1462 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1463     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1464     #ifdef COMPILE_PCRE8
1465 ph10 664 /* This can be an extra read in some situations, but hopefully
1466 ph10 836 it is needed in most cases. */
1467 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1468 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1469 ph10 836 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1470 ph10 664 JUMPHERE(jump);
1471 ph10 836 #else
1472     #ifdef COMPILE_PCRE16
1473     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1474     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1475     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1476     JUMPHERE(jump);
1477     /* Skip low surrogate if necessary. */
1478     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1479     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1480     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1481     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1482     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1483     #endif
1484     #endif /* COMPILE_PCRE8 */
1485 ph10 664 return;
1486     }
1487     #endif
1488 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1489     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1490     #ifdef COMPILE_PCRE16
1491     /* The ctypes array contains only 256 values. */
1492     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1493     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1494     #endif
1495     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1496     #ifdef COMPILE_PCRE16
1497     JUMPHERE(jump);
1498     #endif
1499 ph10 664 }
1500    
1501     static void skip_char_back(compiler_common *common)
1502     {
1503 ph10 836 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1504 ph10 664 DEFINE_COMPILER;
1505 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1506 ph10 664 struct sljit_label *label;
1507    
1508 ph10 836 if (common->utf)
1509 ph10 664 {
1510     label = LABEL();
1511 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1512     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1513 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1514     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1515     return;
1516     }
1517     #endif
1518 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1519     if (common->utf)
1520     {
1521     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1522     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1523     /* Skip low surrogate if necessary. */
1524     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1525     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1526     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1527     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1528     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1529     return;
1530     }
1531     #endif
1532     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1533 ph10 664 }
1534    
1535     static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1536     {
1537     /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1538     DEFINE_COMPILER;
1539    
1540     if (nltype == NLTYPE_ANY)
1541     {
1542     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1543     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1544     }
1545     else if (nltype == NLTYPE_ANYCRLF)
1546     {
1547     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1548     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1549     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1550     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1551     add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1552     }
1553     else
1554     {
1555 ph10 836 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1556 ph10 664 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1557     }
1558     }
1559    
1560 ph10 836 #ifdef SUPPORT_UTF
1561    
1562     #ifdef COMPILE_PCRE8
1563     static void do_utfreadchar(compiler_common *common)
1564 ph10 664 {
1565 ph10 836 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1566 zherczeg 736 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1567 ph10 664 DEFINE_COMPILER;
1568     struct sljit_jump *jump;
1569    
1570     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1571     /* Searching for the first zero. */
1572     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1573     jump = JUMP(SLJIT_C_NOT_ZERO);
1574 ph10 836 /* Two byte sequence. */
1575     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1576     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1577 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1578     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1579     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1580     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1581 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1582 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1583     JUMPHERE(jump);
1584    
1585     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1586     jump = JUMP(SLJIT_C_NOT_ZERO);
1587 ph10 836 /* Three byte sequence. */
1588     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1589 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1590     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1591     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1592     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1593     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1594 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1595     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1596 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1597     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1598 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1599 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1600     JUMPHERE(jump);
1601    
1602 ph10 836 /* Four byte sequence. */
1603     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1604 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1605     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1606     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1607     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1608     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1609 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1610 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1611     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1612     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1613 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1614     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1615 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1616     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1617 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1618 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1619     }
1620    
1621 ph10 836 static void do_utfreadtype8(compiler_common *common)
1622 ph10 664 {
1623 ph10 836 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1624     of the character (>= 0xc0). Return value in TMP1. */
1625 ph10 664 DEFINE_COMPILER;
1626     struct sljit_jump *jump;
1627     struct sljit_jump *compare;
1628    
1629     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1630    
1631     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1632     jump = JUMP(SLJIT_C_NOT_ZERO);
1633 ph10 836 /* Two byte sequence. */
1634     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1635     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1636 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1637     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1638     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1639     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1640     compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1641     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1642     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1643    
1644     JUMPHERE(compare);
1645     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1646     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1647     JUMPHERE(jump);
1648    
1649     /* We only have types for characters less than 256. */
1650 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1651 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1652     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1653     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1654     }
1655    
1656 ph10 836 #else /* COMPILE_PCRE8 */
1657 ph10 664
1658 ph10 836 #ifdef COMPILE_PCRE16
1659     static void do_utfreadchar(compiler_common *common)
1660     {
1661     /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1662     of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1663     DEFINE_COMPILER;
1664     struct sljit_jump *jump;
1665    
1666     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1667     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1668     /* Do nothing, only return. */
1669     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1670    
1671     JUMPHERE(jump);
1672     /* Combine two 16 bit characters. */
1673     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1674     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1675     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1676     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1677     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1678     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1679     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1680     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1681     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1682     }
1683     #endif /* COMPILE_PCRE16 */
1684    
1685     #endif /* COMPILE_PCRE8 */
1686    
1687     #endif /* SUPPORT_UTF */
1688    
1689 ph10 664 #ifdef SUPPORT_UCP
1690    
1691     /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1692     #define UCD_BLOCK_MASK 127
1693     #define UCD_BLOCK_SHIFT 7
1694    
1695     static void do_getucd(compiler_common *common)
1696     {
1697     /* Search the UCD record for the character comes in TMP1.
1698     Returns chartype in TMP1 and UCD offset in TMP2. */
1699     DEFINE_COMPILER;
1700    
1701     SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1702    
1703     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1704     OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1705 ph10 836 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1706 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1707     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1708     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1709 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1710 ph10 664 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1711 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1712 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1713     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1714     }
1715     #endif
1716    
1717     static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1718     {
1719     DEFINE_COMPILER;
1720     struct sljit_label *mainloop;
1721     struct sljit_label *newlinelabel = NULL;
1722     struct sljit_jump *start;
1723     struct sljit_jump *end = NULL;
1724     struct sljit_jump *nl = NULL;
1725 ph10 836 #ifdef SUPPORT_UTF
1726     struct sljit_jump *singlechar;
1727 zherczeg 736 #endif
1728 ph10 664 jump_list *newline = NULL;
1729     BOOL newlinecheck = FALSE;
1730 ph10 836 BOOL readuchar = FALSE;
1731 ph10 664
1732     if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1733     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1734     newlinecheck = TRUE;
1735    
1736     if (firstline)
1737     {
1738     /* Search for the end of the first line. */
1739     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1740     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1741    
1742     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1743     {
1744     mainloop = LABEL();
1745 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1746 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1747 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1748     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1749 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1750     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1751 ph10 836 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1752 ph10 664 }
1753     else
1754     {
1755     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1756     mainloop = LABEL();
1757     /* Continual stores does not cause data dependency. */
1758     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1759     read_char(common);
1760     check_newlinechar(common, common->nltype, &newline, TRUE);
1761     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1762     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1763     set_jumps(newline, LABEL());
1764     }
1765    
1766     JUMPHERE(end);
1767     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1768     }
1769    
1770     start = JUMP(SLJIT_JUMP);
1771    
1772     if (newlinecheck)
1773     {
1774     newlinelabel = LABEL();
1775 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1776 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1777 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1778 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1779     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1780 ph10 836 #ifdef COMPILE_PCRE16
1781     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1782     #endif
1783 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1784     nl = JUMP(SLJIT_JUMP);
1785     }
1786    
1787     mainloop = LABEL();
1788    
1789     /* Increasing the STR_PTR here requires one less jump in the most common case. */
1790 ph10 836 #ifdef SUPPORT_UTF
1791     if (common->utf) readuchar = TRUE;
1792 ph10 664 #endif
1793 ph10 836 if (newlinecheck) readuchar = TRUE;
1794 ph10 664
1795 ph10 836 if (readuchar)
1796     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1797 ph10 664
1798     if (newlinecheck)
1799     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1800    
1801 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1802     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1803     if (common->utf)
1804 ph10 664 {
1805 ph10 836 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1806     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1807 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1808 ph10 836 JUMPHERE(singlechar);
1809 ph10 664 }
1810     #endif
1811 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1812     if (common->utf)
1813     {
1814     singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1815     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1816     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1817     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1818     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1819     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1820     JUMPHERE(singlechar);
1821     }
1822     #endif
1823 ph10 664 JUMPHERE(start);
1824    
1825     if (newlinecheck)
1826     {
1827     JUMPHERE(end);
1828     JUMPHERE(nl);
1829     }
1830    
1831     return mainloop;
1832     }
1833    
1834 ph10 836 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
1835 ph10 664 {
1836     DEFINE_COMPILER;
1837     struct sljit_label *start;
1838     struct sljit_jump *leave;
1839     struct sljit_jump *found;
1840 ph10 836 pcre_uchar oc, bit;
1841 ph10 664
1842     if (firstline)
1843     {
1844     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1845     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1846     }
1847    
1848     start = LABEL();
1849     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1850 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1851 ph10 664
1852 ph10 836 oc = first_char;
1853     if (caseless)
1854     {
1855     oc = TABLE_GET(first_char, common->fcc, first_char);
1856     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
1857     if (first_char > 127 && common->utf)
1858     oc = UCD_OTHERCASE(first_char);
1859     #endif
1860     }
1861     if (first_char == oc)
1862     found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
1863 ph10 664 else
1864     {
1865 ph10 836 bit = first_char ^ oc;
1866 ph10 664 if (ispowerof2(bit))
1867     {
1868     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1869 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
1870 ph10 664 }
1871     else
1872     {
1873 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
1874 ph10 664 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1875     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1876     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1877     found = JUMP(SLJIT_C_NOT_ZERO);
1878     }
1879     }
1880    
1881 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1882     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1883     if (common->utf)
1884 ph10 664 {
1885 zherczeg 736 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1886 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1887 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1888     }
1889     #endif
1890 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1891     if (common->utf)
1892     {
1893     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
1894     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1895     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1896     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1897     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1898     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1899     }
1900     #endif
1901 ph10 664 JUMPTO(SLJIT_JUMP, start);
1902     JUMPHERE(found);
1903     JUMPHERE(leave);
1904    
1905     if (firstline)
1906     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1907     }
1908    
1909     static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1910     {
1911     DEFINE_COMPILER;
1912     struct sljit_label *loop;
1913     struct sljit_jump *lastchar;
1914     struct sljit_jump *firstchar;
1915     struct sljit_jump *leave;
1916     struct sljit_jump *foundcr = NULL;
1917     struct sljit_jump *notfoundnl;
1918     jump_list *newline = NULL;
1919    
1920     if (firstline)
1921     {
1922     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1923     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1924     }
1925    
1926     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1927     {
1928     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1929     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1930     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1931     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1932     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1933    
1934 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
1935 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1936     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1937 ph10 836 #ifdef COMPILE_PCRE16
1938     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1939     #endif
1940 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1941    
1942     loop = LABEL();
1943 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1944 ph10 664 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1945 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
1946     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1947 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1948     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1949    
1950     JUMPHERE(leave);
1951     JUMPHERE(firstchar);
1952     JUMPHERE(lastchar);
1953    
1954     if (firstline)
1955     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1956     return;
1957     }
1958    
1959     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1960     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1961     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1962     skip_char_back(common);
1963    
1964     loop = LABEL();
1965     read_char(common);
1966     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1967     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1968     foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1969     check_newlinechar(common, common->nltype, &newline, FALSE);
1970     set_jumps(newline, loop);
1971    
1972     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1973     {
1974     leave = JUMP(SLJIT_JUMP);
1975     JUMPHERE(foundcr);
1976     notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1977 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1978 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1979     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1980 ph10 836 #ifdef COMPILE_PCRE16
1981     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1982     #endif
1983 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1984     JUMPHERE(notfoundnl);
1985     JUMPHERE(leave);
1986     }
1987     JUMPHERE(lastchar);
1988     JUMPHERE(firstchar);
1989    
1990     if (firstline)
1991     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1992     }
1993    
1994     static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1995     {
1996     DEFINE_COMPILER;
1997     struct sljit_label *start;
1998     struct sljit_jump *leave;
1999     struct sljit_jump *found;
2000 ph10 836 #ifndef COMPILE_PCRE8
2001     struct sljit_jump *jump;
2002     #endif
2003 ph10 664
2004     if (firstline)
2005     {
2006     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2007     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2008     }
2009    
2010     start = LABEL();
2011     leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2012 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2013     #ifdef SUPPORT_UTF
2014     if (common->utf)
2015 zherczeg 736 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2016 ph10 664 #endif
2017 ph10 836 #ifndef COMPILE_PCRE8
2018     jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2019     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2020     JUMPHERE(jump);
2021     #endif
2022 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2023     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2024     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2025     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2026     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2027     found = JUMP(SLJIT_C_NOT_ZERO);
2028    
2029 ph10 836 #ifdef SUPPORT_UTF
2030     if (common->utf)
2031 zherczeg 736 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2032     #endif
2033 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2034     #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2035     if (common->utf)
2036 zherczeg 736 {
2037     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2038 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2039 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2040     }
2041 ph10 664 #endif
2042 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2043     if (common->utf)
2044     {
2045     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2046     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2047     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2048     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2049     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2050     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2051     }
2052     #endif
2053 ph10 664 JUMPTO(SLJIT_JUMP, start);
2054     JUMPHERE(found);
2055     JUMPHERE(leave);
2056    
2057     if (firstline)
2058     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2059     }
2060    
2061 ph10 836 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2062 ph10 664 {
2063     DEFINE_COMPILER;
2064     struct sljit_label *loop;
2065     struct sljit_jump *toolong;
2066     struct sljit_jump *alreadyfound;
2067     struct sljit_jump *found;
2068     struct sljit_jump *foundoc = NULL;
2069     struct sljit_jump *notfound;
2070 ph10 836 pcre_uchar oc, bit;
2071 ph10 664
2072 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
2073 ph10 664 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2074     toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2075     alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2076    
2077 ph10 836 if (has_firstchar)
2078     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2079 ph10 664 else
2080     OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2081    
2082     loop = LABEL();
2083     notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2084    
2085 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2086     oc = req_char;
2087     if (caseless)
2088     {
2089     oc = TABLE_GET(req_char, common->fcc, req_char);
2090     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2091     if (req_char > 127 && common->utf)
2092     oc = UCD_OTHERCASE(req_char);
2093     #endif
2094     }
2095     if (req_char == oc)
2096     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2097 ph10 664 else
2098     {
2099 ph10 836 bit = req_char ^ oc;
2100 ph10 664 if (ispowerof2(bit))
2101     {
2102     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2103 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2104 ph10 664 }
2105     else
2106     {
2107 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2108 ph10 664 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2109     }
2110     }
2111 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2112 ph10 664 JUMPTO(SLJIT_JUMP, loop);
2113    
2114     JUMPHERE(found);
2115     if (foundoc)
2116     JUMPHERE(foundoc);
2117 ph10 836 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
2118 ph10 664 JUMPHERE(alreadyfound);
2119     JUMPHERE(toolong);
2120     return notfound;
2121     }
2122    
2123     static void do_revertframes(compiler_common *common)
2124     {
2125     DEFINE_COMPILER;
2126     struct sljit_jump *jump;
2127     struct sljit_label *mainloop;
2128    
2129     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2130 zherczeg 726 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2131 ph10 664
2132     /* Drop frames until we reach STACK_TOP. */
2133     mainloop = LABEL();
2134     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2135     jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2136     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2137     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2138     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2139     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2140     JUMPTO(SLJIT_JUMP, mainloop);
2141    
2142     JUMPHERE(jump);
2143     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2144     /* End of dropping frames. */
2145     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2146    
2147     JUMPHERE(jump);
2148     jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2149 zherczeg 696 /* Set string begin. */
2150 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2151     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2152     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2153     JUMPTO(SLJIT_JUMP, mainloop);
2154    
2155     JUMPHERE(jump);
2156     /* Unknown command. */
2157     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2158     JUMPTO(SLJIT_JUMP, mainloop);
2159     }
2160    
2161     static void check_wordboundary(compiler_common *common)
2162     {
2163     DEFINE_COMPILER;
2164     struct sljit_jump *beginend;
2165 ph10 836 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2166 ph10 664 struct sljit_jump *jump;
2167 ph10 670 #endif
2168 ph10 664
2169 zherczeg 741 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2170 ph10 664
2171     sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2172     /* Get type of the previous char, and put it to LOCALS1. */
2173     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2174     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2175     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2176     beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2177     skip_char_back(common);
2178     read_char(common);
2179    
2180     /* Testing char type. */
2181     #ifdef SUPPORT_UCP
2182 ph10 836 if (common->use_ucp)
2183 ph10 664 {
2184     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2185     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2186     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2187     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2188     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2189     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2190     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2191     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2192     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2193     JUMPHERE(jump);
2194     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2195     }
2196     else
2197     #endif
2198     {
2199 ph10 836 #ifndef COMPILE_PCRE8
2200     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2201     #elif defined SUPPORT_UTF
2202 ph10 664 /* Here LOCALS1 has already been zeroed. */
2203     jump = NULL;
2204 ph10 836 if (common->utf)
2205 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2206 ph10 836 #endif /* COMPILE_PCRE8 */
2207 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2208     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2209     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2210     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2211 ph10 836 #ifndef COMPILE_PCRE8
2212     JUMPHERE(jump);
2213     #elif defined SUPPORT_UTF
2214 ph10 664 if (jump != NULL)
2215     JUMPHERE(jump);
2216 ph10 836 #endif /* COMPILE_PCRE8 */
2217 ph10 664 }
2218     JUMPHERE(beginend);
2219    
2220     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2221     beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2222     peek_char(common);
2223    
2224     /* Testing char type. This is a code duplication. */
2225     #ifdef SUPPORT_UCP
2226 ph10 836 if (common->use_ucp)
2227 ph10 664 {
2228     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2229     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2230     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2231     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2232     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2233     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2234     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2235     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2236     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2237     JUMPHERE(jump);
2238     }
2239     else
2240     #endif
2241     {
2242 ph10 836 #ifndef COMPILE_PCRE8
2243     /* TMP2 may be destroyed by peek_char. */
2244 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2245 ph10 836 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2246     #elif defined SUPPORT_UTF
2247     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2248 ph10 664 jump = NULL;
2249 ph10 836 if (common->utf)
2250 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2251     #endif
2252     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2253     OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2254     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2255 ph10 836 #ifndef COMPILE_PCRE8
2256     JUMPHERE(jump);
2257     #elif defined SUPPORT_UTF
2258 ph10 664 if (jump != NULL)
2259     JUMPHERE(jump);
2260 ph10 836 #endif /* COMPILE_PCRE8 */
2261 ph10 664 }
2262     JUMPHERE(beginend);
2263    
2264     OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2265     sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2266     }
2267    
2268     static void check_anynewline(compiler_common *common)
2269     {
2270     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2271     DEFINE_COMPILER;
2272    
2273     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2274    
2275     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2276     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2277     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2278     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2279 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2280     #ifdef COMPILE_PCRE8
2281     if (common->utf)
2282 ph10 664 {
2283 ph10 836 #endif
2284 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2285     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2286     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2287 ph10 836 #ifdef COMPILE_PCRE8
2288 ph10 664 }
2289     #endif
2290 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2291 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2292     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2293     }
2294    
2295     static void check_hspace(compiler_common *common)
2296     {
2297     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2298     DEFINE_COMPILER;
2299    
2300     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2301    
2302     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2303     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2304     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2305     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2306     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2307 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2308     #ifdef COMPILE_PCRE8
2309     if (common->utf)
2310 ph10 664 {
2311 ph10 836 #endif
2312 ph10 664 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2313     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2314     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2315     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2316     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2317     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2318     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2319     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2320     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2321     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2322     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2323     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2324     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2325 ph10 836 #ifdef COMPILE_PCRE8
2326 ph10 664 }
2327     #endif
2328 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2329 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2330    
2331     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2332     }
2333    
2334     static void check_vspace(compiler_common *common)
2335     {
2336     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2337     DEFINE_COMPILER;
2338    
2339     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2340    
2341     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2342     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2343     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2344     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2345 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2346     #ifdef COMPILE_PCRE8
2347     if (common->utf)
2348 ph10 664 {
2349 ph10 836 #endif
2350 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2351     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2352     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2353 ph10 836 #ifdef COMPILE_PCRE8
2354 ph10 664 }
2355     #endif
2356 ph10 836 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2357 ph10 664 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2358    
2359     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2360     }
2361    
2362     #define CHAR1 STR_END
2363     #define CHAR2 STACK_TOP
2364    
2365     static void do_casefulcmp(compiler_common *common)
2366     {
2367     DEFINE_COMPILER;
2368     struct sljit_jump *jump;
2369     struct sljit_label *label;
2370    
2371     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2372     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2373     OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2374     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2375 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2376     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2377 ph10 664
2378     label = LABEL();
2379 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2380     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2381 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2382 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2383 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2384    
2385     JUMPHERE(jump);
2386 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2387 ph10 664 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2388     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2389     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2390     }
2391    
2392     #define LCC_TABLE STACK_LIMIT
2393    
2394     static void do_caselesscmp(compiler_common *common)
2395     {
2396     DEFINE_COMPILER;
2397     struct sljit_jump *jump;
2398     struct sljit_label *label;
2399    
2400     sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2401     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2402    
2403     OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2404     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2405     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2406     OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2407 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2408     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2409 ph10 664
2410     label = LABEL();
2411 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2412     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2413     #ifndef COMPILE_PCRE8
2414     jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2415     #endif
2416 ph10 664 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2417 ph10 836 #ifndef COMPILE_PCRE8
2418     JUMPHERE(jump);
2419     jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2420     #endif
2421 ph10 664 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2422 ph10 836 #ifndef COMPILE_PCRE8
2423     JUMPHERE(jump);
2424     #endif
2425 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2426 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2427 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
2428    
2429     JUMPHERE(jump);
2430 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2431 ph10 664 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2432     OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2433     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2434     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2435     }
2436    
2437     #undef LCC_TABLE
2438     #undef CHAR1
2439     #undef CHAR2
2440    
2441 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2442 ph10 664
2443 ph10 836 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2444 ph10 664 {
2445     /* This function would be ineffective to do in JIT level. */
2446     int c1, c2;
2447 ph10 836 const pcre_uchar *src2 = args->ptr;
2448     const pcre_uchar *end2 = args->end;
2449 ph10 664
2450     while (src1 < end1)
2451     {
2452     if (src2 >= end2)
2453     return 0;
2454     GETCHARINC(c1, src1);
2455     GETCHARINC(c2, src2);
2456     if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2457     }
2458     return src2;
2459     }
2460    
2461 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2462 ph10 664
2463 ph10 836 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2464 ph10 664 compare_context* context, jump_list **fallbacks)
2465     {
2466     DEFINE_COMPILER;
2467     unsigned int othercasebit = 0;
2468 ph10 836 pcre_uchar *othercasechar = NULL;
2469     #ifdef SUPPORT_UTF
2470     int utflength;
2471 ph10 664 #endif
2472    
2473     if (caseless && char_has_othercase(common, cc))
2474     {
2475     othercasebit = char_get_othercase_bit(common, cc);
2476     SLJIT_ASSERT(othercasebit);
2477     /* Extracting bit difference info. */
2478 ph10 836 #ifdef COMPILE_PCRE8
2479     othercasechar = cc + (othercasebit >> 8);
2480 ph10 664 othercasebit &= 0xff;
2481 ph10 836 #else
2482     #ifdef COMPILE_PCRE16
2483     othercasechar = cc + (othercasebit >> 9);
2484     if ((othercasebit & 0x100) != 0)
2485     othercasebit = (othercasebit & 0xff) << 8;
2486     else
2487     othercasebit &= 0xff;
2488     #endif
2489     #endif
2490 ph10 664 }
2491    
2492     if (context->sourcereg == -1)
2493     {
2494 ph10 836 #ifdef COMPILE_PCRE8
2495 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2496     if (context->length >= 4)
2497     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2498     else if (context->length >= 2)
2499 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2500 ph10 664 else
2501     #endif
2502     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2503 ph10 836 #else
2504     #ifdef COMPILE_PCRE16
2505     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2506     if (context->length >= 4)
2507     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2508     else
2509     #endif
2510 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2511 ph10 836 #endif
2512     #endif /* COMPILE_PCRE8 */
2513 ph10 664 context->sourcereg = TMP2;
2514     }
2515    
2516 ph10 836 #ifdef SUPPORT_UTF
2517     utflength = 1;
2518     if (common->utf && HAS_EXTRALEN(*cc))
2519     utflength += GET_EXTRALEN(*cc);
2520 ph10 664
2521     do
2522     {
2523     #endif
2524    
2525 ph10 836 context->length -= IN_UCHARS(1);
2526 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2527    
2528     /* Unaligned read is supported. */
2529 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2530 ph10 664 {
2531 ph10 836 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2532     context->oc.asuchars[context->ucharptr] = othercasebit;
2533 ph10 664 }
2534     else
2535     {
2536 ph10 836 context->c.asuchars[context->ucharptr] = *cc;
2537     context->oc.asuchars[context->ucharptr] = 0;
2538 ph10 664 }
2539 ph10 836 context->ucharptr++;
2540 ph10 664
2541 ph10 836 #ifdef COMPILE_PCRE8
2542     if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2543     #else
2544     if (context->ucharptr >= 2 || context->length == 0)
2545     #endif
2546 ph10 664 {
2547     if (context->length >= 4)
2548     OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2549 ph10 836 #ifdef COMPILE_PCRE8
2550 ph10 664 else if (context->length >= 2)
2551 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2552 ph10 664 else if (context->length >= 1)
2553     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2554 ph10 836 #else
2555     else if (context->length >= 2)
2556 zherczeg 847 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2557 ph10 836 #endif
2558 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2559    
2560 ph10 836 switch(context->ucharptr)
2561 ph10 664 {
2562 ph10 836 case 4 / sizeof(pcre_uchar):
2563 ph10 664 if (context->oc.asint != 0)
2564     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2565     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2566     break;
2567    
2568 ph10 836 case 2 / sizeof(pcre_uchar):
2569 zherczeg 847 if (context->oc.asushort != 0)
2570     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2571     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2572 ph10 664 break;
2573    
2574 ph10 836 #ifdef COMPILE_PCRE8
2575 ph10 664 case 1:
2576     if (context->oc.asbyte != 0)
2577     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2578     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2579     break;
2580 ph10 836 #endif
2581 ph10 664
2582     default:
2583     SLJIT_ASSERT_STOP();
2584     break;
2585     }
2586 ph10 836 context->ucharptr = 0;
2587 ph10 664 }
2588 ph10 691
2589 ph10 664 #else
2590    
2591     /* Unaligned read is unsupported. */
2592 ph10 836 #ifdef COMPILE_PCRE8
2593 ph10 664 if (context->length > 0)
2594     OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2595 ph10 836 #else
2596     if (context->length > 0)
2597     OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2598     #endif
2599 ph10 664 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2600    
2601 ph10 836 if (othercasebit != 0 && othercasechar == cc)
2602 ph10 664 {
2603     OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2604     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2605     }
2606     else
2607     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2608    
2609     #endif
2610    
2611     cc++;
2612 ph10 836 #ifdef SUPPORT_UTF
2613     utflength--;
2614 ph10 664 }
2615 ph10 836 while (utflength > 0);
2616 ph10 664 #endif
2617    
2618     return cc;
2619     }
2620    
2621 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2622 ph10 664
2623     #define SET_TYPE_OFFSET(value) \
2624     if ((value) != typeoffset) \
2625     { \
2626     if ((value) > typeoffset) \
2627     OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2628     else \
2629     OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2630     } \
2631     typeoffset = (value);
2632    
2633     #define SET_CHAR_OFFSET(value) \
2634     if ((value) != charoffset) \
2635     { \
2636     if ((value) > charoffset) \
2637     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2638     else \
2639     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2640     } \
2641     charoffset = (value);
2642    
2643 ph10 836 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2644 ph10 664 {
2645     DEFINE_COMPILER;
2646     jump_list *found = NULL;
2647     jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2648     unsigned int c;
2649     int compares;
2650     struct sljit_jump *jump = NULL;
2651 ph10 836 pcre_uchar *ccbegin;
2652 ph10 664 #ifdef SUPPORT_UCP
2653     BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2654     BOOL charsaved = FALSE;
2655 zherczeg 715 int typereg = TMP1, scriptreg = TMP1;
2656     unsigned int typeoffset;
2657 ph10 664 #endif
2658 zherczeg 715 int invertcmp, numberofcmps;
2659     unsigned int charoffset;
2660 ph10 664
2661 ph10 836 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2662 ph10 664 check_input_end(common, fallbacks);
2663     read_char(common);
2664    
2665     if ((*cc++ & XCL_MAP) != 0)
2666     {
2667     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2668 ph10 836 #ifndef COMPILE_PCRE8
2669     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2670     #elif defined SUPPORT_UTF
2671     if (common->utf)
2672 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2673 ph10 836 #endif
2674 ph10 664
2675     OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2676     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2677     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2678     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2679     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2680     add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2681    
2682 ph10 836 #ifndef COMPILE_PCRE8
2683     JUMPHERE(jump);
2684     #elif defined SUPPORT_UTF
2685     if (common->utf)
2686 ph10 664 JUMPHERE(jump);
2687 ph10 836 #endif
2688 ph10 664 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2689     #ifdef SUPPORT_UCP
2690     charsaved = TRUE;
2691     #endif
2692 ph10 836 cc += 32 / sizeof(pcre_uchar);
2693 ph10 664 }
2694    
2695     /* Scanning the necessary info. */
2696     ccbegin = cc;
2697     compares = 0;
2698     while (*cc != XCL_END)
2699     {
2700     compares++;
2701     if (*cc == XCL_SINGLE)
2702     {
2703     cc += 2;
2704 ph10 836 #ifdef SUPPORT_UTF
2705     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2706 ph10 664 #endif
2707     #ifdef SUPPORT_UCP
2708     needschar = TRUE;
2709     #endif
2710     }
2711     else if (*cc == XCL_RANGE)
2712     {
2713     cc += 2;
2714 ph10 836 #ifdef SUPPORT_UTF
2715     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2716 ph10 664 #endif
2717     cc++;
2718 ph10 836 #ifdef SUPPORT_UTF
2719     if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2720 ph10 664 #endif
2721     #ifdef SUPPORT_UCP
2722     needschar = TRUE;
2723     #endif
2724     }
2725     #ifdef SUPPORT_UCP
2726     else
2727     {
2728     SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2729     cc++;
2730     switch(*cc)
2731     {
2732     case PT_ANY:
2733     break;
2734    
2735     case PT_LAMP:
2736     case PT_GC:
2737     case PT_PC:
2738     case PT_ALNUM:
2739     needstype = TRUE;
2740     break;
2741    
2742     case PT_SC:
2743     needsscript = TRUE;
2744     break;
2745    
2746     case PT_SPACE:
2747     case PT_PXSPACE:
2748     case PT_WORD:
2749     needstype = TRUE;
2750     needschar = TRUE;
2751     break;
2752    
2753     default:
2754     SLJIT_ASSERT_STOP();
2755     break;
2756     }
2757     cc += 2;
2758     }
2759     #endif
2760     }
2761    
2762     #ifdef SUPPORT_UCP
2763     /* Simple register allocation. TMP1 is preferred if possible. */
2764     if (needstype || needsscript)
2765     {
2766     if (needschar && !charsaved)
2767     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2768     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2769     if (needschar)
2770     {
2771     if (needstype)
2772     {
2773     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2774     typereg = RETURN_ADDR;
2775     }
2776    
2777     if (needsscript)
2778     scriptreg = TMP3;
2779     OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2780     }
2781     else if (needstype && needsscript)
2782     scriptreg = TMP3;
2783     /* In all other cases only one of them was specified, and that can goes to TMP1. */
2784    
2785     if (needsscript)
2786     {
2787     if (scriptreg == TMP1)
2788     {
2789 ph10 836 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2790 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2791     }
2792     else
2793     {
2794     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2795 ph10 836 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2796 ph10 664 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2797     }
2798     }
2799     }
2800     #endif
2801    
2802     /* Generating code. */
2803     cc = ccbegin;
2804     charoffset = 0;
2805     numberofcmps = 0;
2806     #ifdef SUPPORT_UCP
2807     typeoffset = 0;
2808     #endif
2809    
2810     while (*cc != XCL_END)
2811     {
2812     compares--;
2813     invertcmp = (compares == 0 && list != fallbacks);
2814     jump = NULL;
2815    
2816     if (*cc == XCL_SINGLE)
2817     {
2818     cc ++;
2819 ph10 836 #ifdef SUPPORT_UTF
2820     if (common->utf)
2821 ph10 664 {
2822     GETCHARINC(c, cc);
2823     }
2824     else
2825     #endif
2826     c = *cc++;
2827    
2828     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2829     {
2830     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2831     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2832     numberofcmps++;
2833     }
2834     else if (numberofcmps > 0)
2835     {
2836     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2837     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2838     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2839     numberofcmps = 0;
2840     }
2841     else
2842     {
2843     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2844     numberofcmps = 0;
2845     }
2846     }
2847     else if (*cc == XCL_RANGE)
2848     {
2849     cc ++;
2850 ph10 836 #ifdef SUPPORT_UTF
2851     if (common->utf)
2852 ph10 664 {
2853     GETCHARINC(c, cc);
2854     }
2855     else
2856     #endif
2857     c = *cc++;
2858     SET_CHAR_OFFSET(c);
2859 ph10 836 #ifdef SUPPORT_UTF
2860     if (common->utf)
2861 ph10 664 {
2862     GETCHARINC(c, cc);
2863     }
2864     else
2865     #endif
2866     c = *cc++;
2867     if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2868     {
2869     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2870     COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2871     numberofcmps++;
2872     }
2873     else if (numberofcmps > 0)
2874     {
2875     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2876     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2877     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2878     numberofcmps = 0;
2879     }
2880     else
2881     {
2882     jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2883     numberofcmps = 0;
2884     }
2885     }
2886     #ifdef SUPPORT_UCP
2887     else
2888     {
2889     if (*cc == XCL_NOTPROP)
2890     invertcmp ^= 0x1;
2891     cc++;
2892     switch(*cc)
2893     {
2894     case PT_ANY:
2895     if (list != fallbacks)
2896     {
2897     if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2898     continue;
2899     }
2900     else if (cc[-1] == XCL_NOTPROP)
2901     continue;
2902     jump = JUMP(SLJIT_JUMP);
2903     break;
2904    
2905     case PT_LAMP:
2906     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2907     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2908     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2909     COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2910     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2911     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2912     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2913     break;
2914    
2915     case PT_GC:
2916 ph10 836 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
2917 ph10 664 SET_TYPE_OFFSET(c);
2918 ph10 836 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
2919 ph10 664 break;
2920    
2921     case PT_PC:
2922     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2923     break;
2924    
2925     case PT_SC:
2926     jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2927     break;
2928    
2929     case PT_SPACE:
2930     case PT_PXSPACE:
2931     if (*cc == PT_SPACE)
2932     {
2933     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2934     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2935     }
2936     SET_CHAR_OFFSET(9);
2937     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2938     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2939     if (*cc == PT_SPACE)
2940     JUMPHERE(jump);
2941    
2942     SET_TYPE_OFFSET(ucp_Zl);
2943     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2944     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2945     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2946     break;
2947    
2948     case PT_WORD:
2949     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2950     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2951     /* ... fall through */
2952    
2953     case PT_ALNUM:
2954     SET_TYPE_OFFSET(ucp_Ll);
2955     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2956     COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2957     SET_TYPE_OFFSET(ucp_Nd);
2958     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2959     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2960     jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2961     break;
2962     }
2963     cc += 2;
2964     }
2965     #endif
2966    
2967     if (jump != NULL)
2968     add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2969     }
2970    
2971     if (found != NULL)
2972     set_jumps(found, LABEL());
2973     }
2974    
2975     #undef SET_TYPE_OFFSET
2976     #undef SET_CHAR_OFFSET
2977    
2978     #endif
2979    
2980 ph10 836 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2981 ph10 664 {
2982     DEFINE_COMPILER;
2983     int length;
2984     unsigned int c, oc, bit;
2985     compare_context context;
2986     struct sljit_jump *jump[4];
2987 ph10 836 #ifdef SUPPORT_UTF
2988 ph10 670 struct sljit_label *label;
2989 ph10 664 #ifdef SUPPORT_UCP
2990 ph10 836 pcre_uchar propdata[5];
2991 ph10 664 #endif
2992     #endif
2993    
2994     switch(type)
2995     {
2996     case OP_SOD:
2997     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2998     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2999     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3000     return cc;
3001    
3002     case OP_SOM:
3003     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3004     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3005     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3006     return cc;
3007    
3008     case OP_NOT_WORD_BOUNDARY:
3009     case OP_WORD_BOUNDARY:
3010     add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3011     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3012     return cc;
3013    
3014     case OP_NOT_DIGIT:
3015     case OP_DIGIT:
3016     check_input_end(common, fallbacks);
3017     read_char8_type(common);
3018     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3019     add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3020     return cc;
3021    
3022     case OP_NOT_WHITESPACE:
3023     case OP_WHITESPACE:
3024     check_input_end(common, fallbacks);
3025     read_char8_type(common);
3026     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3027     add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3028     return cc;
3029    
3030     case OP_NOT_WORDCHAR:
3031     case OP_WORDCHAR:
3032     check_input_end(common, fallbacks);
3033     read_char8_type(common);
3034     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3035     add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3036     return cc;
3037    
3038     case OP_ANY:
3039     check_input_end(common, fallbacks);
3040     read_char(common);
3041     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3042     {
3043     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3044     jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3045 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3046 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3047     JUMPHERE(jump[1]);
3048     JUMPHERE(jump[0]);
3049     }
3050     else
3051     check_newlinechar(common, common->nltype, fallbacks, TRUE);
3052     return cc;
3053    
3054     case OP_ALLANY:
3055     check_input_end(common, fallbacks);
3056 ph10 836 #ifdef SUPPORT_UTF
3057     if (common->utf)
3058 ph10 664 {
3059 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3060     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3061     #ifdef COMPILE_PCRE8
3062 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3063 ph10 836 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3064 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3065 ph10 836 #else /* COMPILE_PCRE8 */
3066     #ifdef COMPILE_PCRE16
3067     jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3068     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3069     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3070     COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3071     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3072     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3073     #endif /* COMPILE_PCRE16 */
3074     #endif /* COMPILE_PCRE8 */
3075 zherczeg 736 JUMPHERE(jump[0]);
3076 ph10 664 return cc;
3077     }
3078     #endif
3079 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3080 ph10 664 return cc;
3081    
3082 zherczeg 736 case OP_ANYBYTE:
3083     check_input_end(common, fallbacks);
3084 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3085 zherczeg 736 return cc;
3086    
3087 ph10 836 #ifdef SUPPORT_UTF
3088 ph10 664 #ifdef SUPPORT_UCP
3089     case OP_NOTPROP:
3090     case OP_PROP:
3091     propdata[0] = 0;
3092     propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3093     propdata[2] = cc[0];
3094     propdata[3] = cc[1];
3095     propdata[4] = XCL_END;
3096     compile_xclass_hotpath(common, propdata, fallbacks);
3097     return cc + 2;
3098     #endif
3099     #endif
3100    
3101     case OP_ANYNL:
3102     check_input_end(common, fallbacks);
3103     read_char(common);
3104     jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3105     jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3106 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3107 ph10 664 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3108 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3109 ph10 664 jump[3] = JUMP(SLJIT_JUMP);
3110     JUMPHERE(jump[0]);
3111     check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3112     JUMPHERE(jump[1]);
3113     JUMPHERE(jump[2]);
3114     JUMPHERE(jump[3]);
3115     return cc;
3116    
3117     case OP_NOT_HSPACE:
3118     case OP_HSPACE:
3119     check_input_end(common, fallbacks);
3120     read_char(common);
3121     add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3122     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3123     return cc;
3124    
3125     case OP_NOT_VSPACE:
3126     case OP_VSPACE:
3127     check_input_end(common, fallbacks);
3128     read_char(common);
3129     add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3130     add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3131     return cc;
3132    
3133     #ifdef SUPPORT_UCP
3134     case OP_EXTUNI:
3135     check_input_end(common, fallbacks);
3136     read_char(common);
3137     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3138     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3139     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3140    
3141     label = LABEL();
3142     jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3143     OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3144     read_char(common);
3145     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3146     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3147     CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3148    
3149     OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3150     JUMPHERE(jump[0]);
3151     return cc;
3152     #endif
3153    
3154     case OP_EODN:
3155     jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3156     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3157     {
3158 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3159     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3160 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3161 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3162 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3163     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3164     }
3165     else if (common->nltype == NLTYPE_FIXED)
3166     {
3167 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3168     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3169 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3170     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3171     }
3172     else
3173     {
3174 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3175 ph10 664 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3176 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3177 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3178     jump[2] = JUMP(SLJIT_C_GREATER);
3179     add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3180 ph10 836 /* Equal. */
3181     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3182 ph10 664 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3183     add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3184    
3185     JUMPHERE(jump[1]);
3186     if (common->nltype == NLTYPE_ANYCRLF)
3187     {
3188 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3189 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3190     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3191     }
3192     else
3193     {
3194     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3195     read_char(common);
3196     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3197     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3198     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3199     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3200     }
3201     JUMPHERE(jump[2]);
3202     JUMPHERE(jump[3]);
3203     }
3204     JUMPHERE(jump[0]);
3205     return cc;
3206    
3207     case OP_EOD:
3208     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3209     return cc;
3210    
3211     case OP_CIRC:
3212     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3213     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3214     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3215     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3216     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3217     return cc;
3218    
3219     case OP_CIRCM:
3220     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3221     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3222     jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3223     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3224     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3225     jump[0] = JUMP(SLJIT_JUMP);
3226     JUMPHERE(jump[1]);
3227    
3228 ph10 836 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, STR_PTR, 0, STR_END, 0));
3229 ph10 664 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3230     {
3231 ph10 836 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3232 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3233 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3234     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3235 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3236     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3237     }
3238     else
3239     {
3240     skip_char_back(common);
3241     read_char(common);
3242     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3243     }
3244     JUMPHERE(jump[0]);
3245     return cc;
3246    
3247     case OP_DOLL:
3248     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3249     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3250     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3251    
3252     if (!common->endonly)
3253     compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3254     else
3255     add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3256     return cc;
3257    
3258     case OP_DOLLM:
3259     jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3260     OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3261     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3262     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3263     jump[0] = JUMP(SLJIT_JUMP);
3264     JUMPHERE(jump[1]);
3265    
3266     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3267     {
3268 ph10 836 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3269 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3270 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3271     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3272 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3273     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3274     }
3275     else
3276     {
3277     peek_char(common);
3278     check_newlinechar(common, common->nltype, fallbacks, FALSE);
3279     }
3280     JUMPHERE(jump[0]);
3281     return cc;
3282    
3283     case OP_CHAR:
3284     case OP_CHARI:
3285     length = 1;
3286 ph10 836 #ifdef SUPPORT_UTF
3287     if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3288 ph10 664 #endif
3289     if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3290     {
3291 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3292 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3293    
3294 ph10 836 context.length = IN_UCHARS(length);
3295 ph10 664 context.sourcereg = -1;
3296     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3297 ph10 836 context.ucharptr = 0;
3298 ph10 664 #endif
3299     return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3300     }
3301 ph10 836 check_input_end(common, fallbacks);
3302 ph10 664 read_char(common);
3303 ph10 836 #ifdef SUPPORT_UTF
3304     if (common->utf)
3305 ph10 664 {
3306     GETCHAR(c, cc);
3307     }
3308     else
3309     #endif
3310     c = *cc;
3311     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3312     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3313     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3314     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3315     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3316     return cc + length;
3317    
3318     case OP_NOT:
3319     case OP_NOTI:
3320 ph10 836 check_input_end(common, fallbacks);
3321 ph10 664 length = 1;
3322 ph10 836 #ifdef SUPPORT_UTF
3323     if (common->utf)
3324 ph10 664 {
3325 ph10 836 #ifdef COMPILE_PCRE8
3326     c = *cc;
3327     if (c < 128)
3328 ph10 664 {
3329     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3330     if (type == OP_NOT || !char_has_othercase(common, cc))
3331     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3332     else
3333     {
3334     /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3335 zherczeg 736 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3336     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3337 ph10 664 }
3338     /* Skip the variable-length character. */
3339 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3340 zherczeg 736 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3341 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3342 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3343     JUMPHERE(jump[0]);
3344 ph10 836 return cc + 1;
3345 ph10 664 }
3346     else
3347 ph10 836 #endif /* COMPILE_PCRE8 */
3348     {
3349     GETCHARLEN(c, cc, length);
3350 ph10 664 read_char(common);
3351 ph10 836 }
3352 ph10 664 }
3353     else
3354 ph10 836 #endif /* SUPPORT_UTF */
3355 ph10 664 {
3356 ph10 836 read_char(common);
3357 ph10 664 c = *cc;
3358     }
3359    
3360     if (type == OP_NOT || !char_has_othercase(common, cc))
3361     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3362     else
3363     {
3364     oc = char_othercase(common, c);
3365     bit = c ^ oc;
3366     if (ispowerof2(bit))
3367     {
3368     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3369     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3370     }
3371     else
3372     {
3373     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3374     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3375     }
3376     }
3377 ph10 836 return cc + 1;
3378 ph10 664
3379     case OP_CLASS:
3380     case OP_NCLASS:
3381     check_input_end(common, fallbacks);
3382     read_char(common);
3383 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3384 ph10 664 jump[0] = NULL;
3385 ph10 836 #ifdef COMPILE_PCRE8
3386     /* This check only affects 8 bit mode. In other modes, we
3387     always need to compare the value with 255. */
3388     if (common->utf)
3389     #endif /* COMPILE_PCRE8 */
3390 ph10 664 {
3391     jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3392     if (type == OP_CLASS)
3393     {
3394     add_jump(compiler, fallbacks, jump[0]);
3395     jump[0] = NULL;
3396     }
3397     }
3398 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3399 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3400     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3401     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3402     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3403     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3404     add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3405 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3406 ph10 664 if (jump[0] != NULL)
3407     JUMPHERE(jump[0]);
3408 ph10 836 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3409     return cc + 32 / sizeof(pcre_uchar);
3410 ph10 664
3411 ph10 836 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3412 ph10 664 case OP_XCLASS:
3413     compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3414     return cc + GET(cc, 0) - 1;
3415     #endif
3416    
3417     case OP_REVERSE:
3418     length = GET(cc, 0);
3419     SLJIT_ASSERT(length > 0);
3420     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3421 ph10 836 #ifdef SUPPORT_UTF
3422     if (common->utf)
3423 ph10 664 {
3424 ph10 836 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3425 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3426     label = LABEL();
3427 ph10 836 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3428 ph10 664 skip_char_back(common);
3429     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3430     JUMPTO(SLJIT_C_NOT_ZERO, label);
3431     return cc + LINK_SIZE;
3432     }
3433     #endif
3434 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3435     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3436 ph10 664 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3437     return cc + LINK_SIZE;
3438     }
3439     SLJIT_ASSERT_STOP();
3440     return cc;
3441     }
3442    
3443 ph10 836 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3444 ph10 664 {
3445     /* This function consumes at least one input character. */
3446     /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3447     DEFINE_COMPILER;
3448 ph10 836 pcre_uchar *ccbegin = cc;
3449 ph10 664 compare_context context;
3450     int size;
3451    
3452     context.length = 0;
3453     do
3454     {
3455     if (cc >= ccend)
3456     break;
3457    
3458     if (*cc == OP_CHAR)
3459     {
3460     size = 1;
3461 ph10 836 #ifdef SUPPORT_UTF
3462     if (common->utf && HAS_EXTRALEN(cc[1]))
3463     size += GET_EXTRALEN(cc[1]);
3464 ph10 664 #endif
3465     }
3466     else if (*cc == OP_CHARI)
3467     {
3468     size = 1;
3469 ph10 836 #ifdef SUPPORT_UTF
3470     if (common->utf)
3471 ph10 664 {
3472     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3473     size = 0;
3474 ph10 836 else if (HAS_EXTRALEN(cc[1]))
3475     size += GET_EXTRALEN(cc[1]);
3476 ph10 664 }
3477 ph10 691 else
3478 ph10 664 #endif
3479     if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3480     size = 0;
3481     }
3482     else
3483     size = 0;
3484    
3485     cc += 1 + size;
3486 ph10 836 context.length += IN_UCHARS(size);
3487 ph10 664 }
3488     while (size > 0 && context.length <= 128);
3489    
3490     cc = ccbegin;
3491     if (context.length > 0)
3492     {
3493     /* We have a fixed-length byte sequence. */
3494     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3495     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3496    
3497     context.sourcereg = -1;
3498     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3499 ph10 836 context.ucharptr = 0;
3500 ph10 664 #endif
3501     do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3502     return cc;
3503     }
3504    
3505     /* A non-fixed length character will be checked if length == 0. */
3506     return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3507     }
3508    
3509 ph10 836 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3510 ph10 664 {
3511     DEFINE_COMPILER;
3512     int offset = GET2(cc, 1) << 1;
3513    
3514     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3515     if (!common->jscript_compat)
3516     {
3517     if (fallbacks == NULL)
3518     {
3519     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3520     COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3521     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3522     COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3523     return JUMP(SLJIT_C_NOT_ZERO);
3524     }
3525     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3526     }
3527     return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3528     }
3529    
3530     /* Forward definitions. */
3531 ph10 836 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3532 ph10 664 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3533    
3534     #define PUSH_FALLBACK(size, ccstart, error) \
3535     do \
3536     { \
3537     fallback = sljit_alloc_memory(compiler, (size)); \
3538     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3539     return error; \
3540     memset(fallback, 0, size); \
3541     fallback->prev = parent->top; \
3542     fallback->cc = (ccstart); \
3543     parent->top = fallback; \
3544     } \
3545     while (0)
3546    
3547     #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3548     do \
3549     { \
3550     fallback = sljit_alloc_memory(compiler, (size)); \
3551     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3552     return; \
3553     memset(fallback, 0, size); \
3554     fallback->prev = parent->top; \
3555     fallback->cc = (ccstart); \
3556     parent->top = fallback; \
3557     } \
3558     while (0)
3559    
3560     #define FALLBACK_AS(type) ((type*)fallback)
3561    
3562 ph10 836 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3563 ph10 664 {
3564     DEFINE_COMPILER;
3565     int offset = GET2(cc, 1) << 1;
3566     struct sljit_jump *jump = NULL;
3567    
3568     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3569     if (withchecks && !common->jscript_compat)
3570     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3571    
3572 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3573     if (common->utf && *cc == OP_REFI)
3574 ph10 664 {
3575     SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3576     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3577     if (withchecks)
3578     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3579    
3580     /* Needed to save important temporary registers. */
3581     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3582     OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3583     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3584 ph10 836 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3585 ph10 664 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3586     add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3587     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3588     }
3589     else
3590 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3591 ph10 664 {
3592     OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3593     if (withchecks)
3594     jump = JUMP(SLJIT_C_ZERO);
3595     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3596    
3597     add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3598     add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3599     add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3600     }
3601    
3602     if (jump != NULL)
3603     {
3604     if (emptyfail)
3605     add_jump(compiler, fallbacks, jump);
3606     else
3607     JUMPHERE(jump);
3608     }
3609 ph10 836 return cc + 1 + IMM2_SIZE;
3610 ph10 664 }
3611    
3612 ph10 836 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3613 ph10 664 {
3614     DEFINE_COMPILER;
3615     fallback_common *fallback;
3616 ph10 836 pcre_uchar type;
3617 ph10 664 struct sljit_label *label;
3618     struct sljit_jump *zerolength;
3619     struct sljit_jump *jump = NULL;
3620 ph10 836 pcre_uchar *ccbegin = cc;
3621 ph10 664 int min = 0, max = 0;
3622     BOOL minimize;
3623    
3624     PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3625    
3626 ph10 836 type = cc[1 + IMM2_SIZE];
3627 ph10 664 minimize = (type & 0x1) != 0;
3628     switch(type)
3629     {
3630     case OP_CRSTAR:
3631     case OP_CRMINSTAR:
3632     min = 0;
3633     max = 0;
3634 ph10 836 cc += 1 + IMM2_SIZE + 1;
3635 ph10 664 break;
3636     case OP_CRPLUS:
3637     case OP_CRMINPLUS:
3638     min = 1;
3639     max = 0;
3640 ph10 836 cc += 1 + IMM2_SIZE + 1;
3641 ph10 664 break;
3642     case OP_CRQUERY:
3643     case OP_CRMINQUERY:
3644     min = 0;
3645     max = 1;
3646 ph10 836 cc += 1 + IMM2_SIZE + 1;
3647 ph10 664 break;
3648     case OP_CRRANGE:
3649     case OP_CRMINRANGE:
3650 ph10 836 min = GET2(cc, 1 + IMM2_SIZE + 1);
3651     max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3652     cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3653 ph10 664 break;
3654     default:
3655     SLJIT_ASSERT_STOP();
3656     break;
3657     }
3658    
3659     if (!minimize)
3660     {
3661     if (min == 0)
3662     {
3663     allocate_stack(common, 2);
3664     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3665     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3666     /* Temporary release of STR_PTR. */
3667     OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3668     zerolength = compile_ref_checks(common, ccbegin, NULL);
3669     /* Restore if not zero length. */
3670     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3671     }
3672     else
3673     {
3674     allocate_stack(common, 1);
3675     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3676     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3677     }
3678    
3679     if (min > 1 || max > 1)
3680     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3681    
3682     label = LABEL();
3683     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3684    
3685     if (min > 1 || max > 1)
3686     {
3687     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3688     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3689     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3690     if (min > 1)
3691     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3692     if (max > 1)
3693     {
3694     jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3695     allocate_stack(common, 1);
3696     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3697     JUMPTO(SLJIT_JUMP, label);
3698     JUMPHERE(jump);
3699     }
3700     }
3701    
3702     if (max == 0)
3703     {
3704     /* Includes min > 1 case as well. */
3705     allocate_stack(common, 1);
3706     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3707     JUMPTO(SLJIT_JUMP, label);
3708     }
3709    
3710     JUMPHERE(zerolength);
3711     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3712 ph10 677
3713     decrease_call_count(common);
3714 ph10 664 return cc;
3715     }
3716    
3717     allocate_stack(common, 2);
3718     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3719     if (type != OP_CRMINSTAR)
3720     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3721    
3722     if (min == 0)
3723     {
3724     zerolength = compile_ref_checks(common, ccbegin, NULL);
3725     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3726     jump = JUMP(SLJIT_JUMP);
3727     }
3728     else
3729     zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3730    
3731     FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3732     if (max > 0)
3733     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3734    
3735     compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3736     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3737    
3738     if (min > 1)
3739     {
3740     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3741     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3742     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3743     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3744     }
3745     else if (max > 0)
3746     OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3747    
3748     if (jump != NULL)
3749     JUMPHERE(jump);
3750     JUMPHERE(zerolength);
3751 ph10 677
3752     decrease_call_count(common);
3753 ph10 664 return cc;
3754     }
3755    
3756 ph10 836 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3757 ph10 664 {
3758     DEFINE_COMPILER;
3759     fallback_common *fallback;
3760     recurse_entry *entry = common->entries;
3761     recurse_entry *prev = NULL;
3762     int start = GET(cc, 1);
3763    
3764     PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3765     while (entry != NULL)
3766     {
3767     if (entry->start == start)
3768     break;
3769     prev = entry;
3770     entry = entry->next;
3771     }
3772    
3773     if (entry == NULL)
3774     {
3775     entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3776     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3777     return NULL;
3778     entry->next = NULL;
3779     entry->entry = NULL;
3780     entry->calls = NULL;
3781     entry->start = start;
3782    
3783     if (prev != NULL)
3784     prev->next = entry;
3785     else
3786     common->entries = entry;
3787     }
3788    
3789     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3790     allocate_stack(common, 1);
3791     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3792    
3793     if (entry->entry == NULL)
3794     add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3795     else
3796     JUMPTO(SLJIT_FAST_CALL, entry->entry);
3797     /* Leave if the match is failed. */
3798     add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3799     return cc + 1 + LINK_SIZE;
3800     }
3801    
3802 ph10 836 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3803 ph10 664 {
3804     DEFINE_COMPILER;
3805     int framesize;
3806     int localptr;
3807     fallback_common altfallback;
3808 ph10 836 pcre_uchar *ccbegin;
3809     pcre_uchar opcode;
3810     pcre_uchar bra = OP_BRA;
3811 ph10 664 jump_list *tmp = NULL;
3812     jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3813     jump_list **found;
3814     /* Saving previous accept variables. */
3815     struct sljit_label *save_acceptlabel = common->acceptlabel;
3816     struct sljit_jump *jump;
3817     struct sljit_jump *brajump = NULL;
3818     jump_list *save_accept = common->accept;
3819    
3820     if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3821     {
3822     SLJIT_ASSERT(!conditional);
3823     bra = *cc;
3824     cc++;
3825     }
3826 ph10 836 localptr = PRIV_DATA(cc);
3827 ph10 664 SLJIT_ASSERT(localptr != 0);
3828     framesize = get_framesize(common, cc, FALSE);
3829     fallback->framesize = framesize;
3830     fallback->localptr = localptr;
3831     opcode = *cc;
3832     SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3833     found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3834     ccbegin = cc;
3835     cc += GET(cc, 1);
3836    
3837     if (bra == OP_BRAMINZERO)
3838     {
3839     /* This is a braminzero fallback path. */
3840     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3841     free_stack(common, 1);
3842     brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3843     }
3844    
3845     if (framesize < 0)
3846     {
3847     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3848     allocate_stack(common, 1);
3849     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3850     }
3851     else
3852     {
3853     allocate_stack(common, framesize + 2);
3854     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3855     OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3856     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3857     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3858     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3859     init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3860     }
3861    
3862     memset(&altfallback, 0, sizeof(fallback_common));
3863     while (1)
3864     {
3865     common->acceptlabel = NULL;
3866     common->accept = NULL;
3867     altfallback.top = NULL;
3868     altfallback.topfallbacks = NULL;
3869    
3870     if (*ccbegin == OP_ALT)
3871     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3872    
3873     altfallback.cc = ccbegin;
3874     compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3875     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3876     {
3877     common->acceptlabel = save_acceptlabel;
3878     common->accept = save_accept;
3879     return NULL;
3880     }
3881     common->acceptlabel = LABEL();
3882     if (common->accept != NULL)
3883     set_jumps(common->accept, common->acceptlabel);
3884    
3885 zherczeg 726 /* Reset stack. */
3886 ph10 664 if (framesize < 0)
3887     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3888 zherczeg 726 else {
3889     if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3890     {
3891     /* We don't need to keep the STR_PTR, only the previous localptr. */
3892     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3893     }
3894     else
3895     {
3896     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3897     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3898     }
3899     }
3900 ph10 664
3901     if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3902     {
3903     /* We know that STR_PTR was stored on the top of the stack. */
3904     if (conditional)
3905 zherczeg 726 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3906 ph10 664 else if (bra == OP_BRAZERO)
3907     {
3908     if (framesize < 0)
3909     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3910     else
3911     {
3912     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3913     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3914     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3915     }
3916     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3917     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3918     }
3919 zherczeg 726 else if (framesize >= 0)
3920 ph10 664 {
3921 zherczeg 726 /* For OP_BRA and OP_BRAMINZERO. */
3922     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3923 ph10 664 }
3924     }
3925     add_jump(compiler, found, JUMP(SLJIT_JUMP));
3926    
3927     compile_fallbackpath(common, altfallback.top);
3928     if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3929     {
3930     common->acceptlabel = save_acceptlabel;
3931     common->accept = save_accept;
3932     return NULL;
3933     }
3934     set_jumps(altfallback.topfallbacks, LABEL());
3935    
3936     if (*cc != OP_ALT)
3937     break;
3938    
3939     ccbegin = cc;
3940     cc += GET(cc, 1);
3941     }
3942     /* None of them matched. */
3943    
3944     if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3945     {
3946     /* Assert is failed. */
3947     if (conditional || bra == OP_BRAZERO)
3948     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3949 zherczeg 726
3950 ph10 664 if (framesize < 0)
3951     {
3952     /* The topmost item should be 0. */
3953     if (bra == OP_BRAZERO)
3954     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3955     else
3956     free_stack(common, 1);
3957     }
3958     else
3959     {
3960     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3961     /* The topmost item should be 0. */
3962     if (bra == OP_BRAZERO)
3963     {
3964     free_stack(common, framesize + 1);
3965     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3966     }
3967     else
3968     free_stack(common, framesize + 2);
3969     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3970     }
3971     jump = JUMP(SLJIT_JUMP);
3972     if (bra != OP_BRAZERO)
3973     add_jump(compiler, target, jump);
3974    
3975     /* Assert is successful. */
3976     set_jumps(tmp, LABEL());
3977     if (framesize < 0)
3978     {
3979     /* We know that STR_PTR was stored on the top of the stack. */
3980     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3981     /* Keep the STR_PTR on the top of the stack. */
3982     if (bra == OP_BRAZERO)
3983     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3984     else if (bra == OP_BRAMINZERO)
3985     {
3986     OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3987     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3988     }
3989     }
3990     else
3991     {
3992 zherczeg 726 if (bra == OP_BRA)
3993 ph10 664 {
3994 zherczeg 726 /* We don't need to keep the STR_PTR, only the previous localptr. */
3995     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3996     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3997 ph10 664 }
3998 zherczeg 726 else
3999 ph10 664 {
4000 zherczeg 726 /* We don't need to keep the STR_PTR, only the previous localptr. */
4001     OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4002     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4003     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4004 ph10 664 }
4005     }
4006    
4007     if (bra == OP_BRAZERO)
4008     {
4009     fallback->hotpath = LABEL();
4010     sljit_set_label(jump, fallback->hotpath);
4011     }
4012     else if (bra == OP_BRAMINZERO)
4013     {
4014     JUMPTO(SLJIT_JUMP, fallback->hotpath);
4015     JUMPHERE(brajump);
4016     if (framesize >= 0)
4017     {
4018     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4019     add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4020     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4021     }
4022     set_jumps(fallback->common.topfallbacks, LABEL());
4023     }
4024     }
4025     else
4026     {
4027     /* AssertNot is successful. */
4028     if (framesize < 0)
4029     {
4030     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4031     if (bra != OP_BRA)
4032     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4033     else
4034     free_stack(common, 1);
4035     }
4036     else
4037     {
4038     OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4039     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4040     /* The topmost item should be 0. */
4041     if (bra != OP_BRA)
4042     {
4043     free_stack(common, framesize + 1);
4044     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4045     }
4046     else
4047     free_stack(common, framesize + 2);
4048     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4049     }
4050    
4051     if (bra == OP_BRAZERO)
4052     fallback->hotpath = LABEL();
4053     else if (bra == OP_BRAMINZERO)
4054     {
4055     JUMPTO(SLJIT_JUMP, fallback->hotpath);
4056     JUMPHERE(brajump);
4057     }
4058    
4059     if (bra != OP_BRA)
4060     {
4061     SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4062     set_jumps(fallback->common.topfallbacks, LABEL());
4063     fallback->common.topfallbacks = NULL;
4064     }
4065     }
4066    
4067     common->acceptlabel = save_acceptlabel;
4068     common->accept = save_accept;
4069     return cc + 1 + LINK_SIZE;
4070     }
4071    
4072 ph10 836 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4073 zherczeg 741 {
4074     int condition = FALSE;
4075 ph10 836 pcre_uchar *slotA = name_table;
4076     pcre_uchar *slotB;
4077 zherczeg 741 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4078     sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4079     sljit_w no_capture;
4080     int i;
4081    
4082     locals += OVECTOR_START / sizeof(sljit_w);
4083     no_capture = locals[1];
4084    
4085     for (i = 0; i < name_count; i++)
4086     {
4087     if (GET2(slotA, 0) == refno) break;
4088     slotA += name_entry_size;
4089     }
4090    
4091     if (i < name_count)
4092     {
4093     /* Found a name for the number - there can be only one; duplicate names
4094     for different numbers are allowed, but not vice versa. First scan down
4095     for duplicates. */
4096    
4097     slotB = slotA;
4098     while (slotB > name_table)
4099     {
4100     slotB -= name_entry_size;
4101 ph10 836 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4102 zherczeg 741 {
4103     condition = locals[GET2(slotB, 0) << 1] != no_capture;
4104     if (condition) break;
4105     }
4106     else break;
4107     }
4108    
4109     /* Scan up for duplicates */
4110     if (!condition)
4111     {
4112     slotB = slotA;
4113     for (i++; i < name_count; i++)
4114     {
4115     slotB += name_entry_size;
4116 ph10 836 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4117 zherczeg 741 {
4118     condition = locals[GET2(slotB, 0) << 1] != no_capture;
4119     if (condition) break;
4120     }
4121     else break;
4122     }
4123     }
4124     }
4125     return condition;
4126     }
4127    
4128 ph10 836 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4129 zherczeg 741 {
4130     int condition = FALSE;
4131 ph10 836 pcre_uchar *slotA = name_table;
4132     pcre_uchar *slotB;
4133 zherczeg 741 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4134     sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4135     sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4136     int i;
4137    
4138     for (i = 0; i < name_count; i++)
4139     {
4140     if (GET2(slotA, 0) == recno) break;
4141     slotA += name_entry_size;
4142     }
4143    
4144     if (i < name_count)
4145     {
4146     /* Found a name for the number - there can be only one; duplicate
4147     names for different numbers are allowed, but not vice versa. First
4148     scan down for duplicates. */
4149    
4150     slotB = slotA;
4151     while (slotB > name_table)
4152     {
4153     slotB -= name_entry_size;
4154 ph10 836 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4155 zherczeg 741 {
4156     condition = GET2(slotB, 0) == group_num;
4157     if (condition) break;
4158     }
4159     else break;
4160     }
4161    
4162     /* Scan up for duplicates */
4163     if (!condition)
4164     {
4165     slotB = slotA;
4166     for (i++; i < name_count; i++)
4167     {
4168     slotB += name_entry_size;
4169 ph10 836 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4170 zherczeg 741 {
4171     condition = GET2(slotB, 0) == group_num;
4172     if (condition) break;
4173     }
4174     else break;
4175     }
4176     }
4177     }
4178     return condition;
4179     }
4180    
4181 ph10 664 /*
4182     Handling bracketed expressions is probably the most complex part.
4183    
4184     Stack layout naming characters:
4185     S - Push the current STR_PTR
4186     0 - Push a 0 (NULL)
4187     A - Push the current STR_PTR. Needed for restoring the STR_PTR
4188     before the next alternative. Not pushed if there are no alternatives.