/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 924 - (show annotations) (download)
Wed Feb 22 10:23:56 2012 UTC (2 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 225483 byte(s)
OP_NOT now supports any UTF character
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_functions {
166 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
167 PUBL(jit_callback) callback;
168 void *userdata;
169 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
170 } executable_functions;
171
172 typedef struct jump_list {
173 struct sljit_jump *jump;
174 struct jump_list *next;
175 } jump_list;
176
177 enum stub_types { stack_alloc };
178
179 typedef struct stub_list {
180 enum stub_types type;
181 int data;
182 struct sljit_jump *start;
183 struct sljit_label *leave;
184 struct stub_list *next;
185 } stub_list;
186
187 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188
189 /* The following structure is the key data type for the recursive
190 code generator. It is allocated by compile_hotpath, and contains
191 the aguments for compile_fallbackpath. Must be the first member
192 of its descendants. */
193 typedef struct fallback_common {
194 /* Concatenation stack. */
195 struct fallback_common *prev;
196 jump_list *nextfallbacks;
197 /* Internal stack (for component operators). */
198 struct fallback_common *top;
199 jump_list *topfallbacks;
200 /* Opcode pointer. */
201 pcre_uchar *cc;
202 } fallback_common;
203
204 typedef struct assert_fallback {
205 fallback_common common;
206 jump_list *condfailed;
207 /* Less than 0 (-1) if a frame is not needed. */
208 int framesize;
209 /* Points to our private memory word on the stack. */
210 int localptr;
211 /* For iterators. */
212 struct sljit_label *hotpath;
213 } assert_fallback;
214
215 typedef struct bracket_fallback {
216 fallback_common common;
217 /* Where to coninue if an alternative is successfully matched. */
218 struct sljit_label *althotpath;
219 /* For rmin and rmax iterators. */
220 struct sljit_label *recursivehotpath;
221 /* For greedy ? operator. */
222 struct sljit_label *zerohotpath;
223 /* Contains the branches of a failed condition. */
224 union {
225 /* Both for OP_COND, OP_SCOND. */
226 jump_list *condfailed;
227 assert_fallback *assert;
228 /* For OP_ONCE. -1 if not needed. */
229 int framesize;
230 } u;
231 /* Points to our private memory word on the stack. */
232 int localptr;
233 } bracket_fallback;
234
235 typedef struct bracketpos_fallback {
236 fallback_common common;
237 /* Points to our private memory word on the stack. */
238 int localptr;
239 /* Reverting stack is needed. */
240 int framesize;
241 /* Allocated stack size. */
242 int stacksize;
243 } bracketpos_fallback;
244
245 typedef struct braminzero_fallback {
246 fallback_common common;
247 struct sljit_label *hotpath;
248 } braminzero_fallback;
249
250 typedef struct iterator_fallback {
251 fallback_common common;
252 /* Next iteration. */
253 struct sljit_label *hotpath;
254 } iterator_fallback;
255
256 typedef struct recurse_entry {
257 struct recurse_entry *next;
258 /* Contains the function entry. */
259 struct sljit_label *entry;
260 /* Collects the calls until the function is not created. */
261 jump_list *calls;
262 /* Points to the starting opcode. */
263 int start;
264 } recurse_entry;
265
266 typedef struct recurse_fallback {
267 fallback_common common;
268 } recurse_fallback;
269
270 typedef struct compiler_common {
271 struct sljit_compiler *compiler;
272 pcre_uchar *start;
273
274 /* Local stack area size and variable pointers. */
275 int localsize;
276 int *localptrs;
277 int cbraptr;
278 /* OVector starting point. Must be divisible by 2. */
279 int ovector_start;
280 /* Last known position of the requested byte. */
281 int req_char_ptr;
282 /* Head of the last recursion. */
283 int recursive_head;
284 /* First inspected character for partial matching. */
285 int start_used_ptr;
286 /* Starting pointer for partial soft matches. */
287 int hit_start;
288 /* End pointer of the first line. */
289 int first_line_end;
290
291 /* Other */
292 const pcre_uint8 *fcc;
293 sljit_w lcc;
294 int mode;
295 int nltype;
296 int newline;
297 int bsr_nltype;
298 int endonly;
299 sljit_w ctypes;
300 sljit_uw name_table;
301 sljit_w name_count;
302 sljit_w name_entry_size;
303
304 /* Labels and jump lists. */
305 struct sljit_label *partialmatchlabel;
306 struct sljit_label *acceptlabel;
307 stub_list *stubs;
308 recurse_entry *entries;
309 recurse_entry *currententry;
310 jump_list *partialmatch;
311 jump_list *accept;
312 jump_list *calllimit;
313 jump_list *stackalloc;
314 jump_list *revertframes;
315 jump_list *wordboundary;
316 jump_list *anynewline;
317 jump_list *hspace;
318 jump_list *vspace;
319 jump_list *casefulcmp;
320 jump_list *caselesscmp;
321 BOOL jscript_compat;
322 #ifdef SUPPORT_UTF
323 BOOL utf;
324 #ifdef SUPPORT_UCP
325 BOOL use_ucp;
326 #endif
327 jump_list *utfreadchar;
328 #ifdef COMPILE_PCRE8
329 jump_list *utfreadtype8;
330 #endif
331 #endif /* SUPPORT_UTF */
332 #ifdef SUPPORT_UCP
333 jump_list *getucd;
334 #endif
335 } compiler_common;
336
337 /* For byte_sequence_compare. */
338
339 typedef struct compare_context {
340 int length;
341 int sourcereg;
342 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
343 int ucharptr;
344 union {
345 sljit_i asint;
346 sljit_uh asushort;
347 #ifdef COMPILE_PCRE8
348 sljit_ub asbyte;
349 sljit_ub asuchars[4];
350 #else
351 #ifdef COMPILE_PCRE16
352 sljit_uh asuchars[2];
353 #endif
354 #endif
355 } c;
356 union {
357 sljit_i asint;
358 sljit_uh asushort;
359 #ifdef COMPILE_PCRE8
360 sljit_ub asbyte;
361 sljit_ub asuchars[4];
362 #else
363 #ifdef COMPILE_PCRE16
364 sljit_uh asuchars[2];
365 #endif
366 #endif
367 } oc;
368 #endif
369 } compare_context;
370
371 enum {
372 frame_end = 0,
373 frame_setstrbegin = -1
374 };
375
376 /* Undefine sljit macros. */
377 #undef CMP
378
379 /* Used for accessing the elements of the stack. */
380 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
381
382 #define TMP1 SLJIT_TEMPORARY_REG1
383 #define TMP2 SLJIT_TEMPORARY_REG3
384 #define TMP3 SLJIT_TEMPORARY_EREG2
385 #define STR_PTR SLJIT_SAVED_REG1
386 #define STR_END SLJIT_SAVED_REG2
387 #define STACK_TOP SLJIT_TEMPORARY_REG2
388 #define STACK_LIMIT SLJIT_SAVED_REG3
389 #define ARGUMENTS SLJIT_SAVED_EREG1
390 #define CALL_COUNT SLJIT_SAVED_EREG2
391 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
392
393 /* Locals layout. */
394 /* These two locals can be used by the current opcode. */
395 #define LOCALS0 (0 * sizeof(sljit_w))
396 #define LOCALS1 (1 * sizeof(sljit_w))
397 /* Two local variables for possessive quantifiers (char1 cannot use them). */
398 #define POSSESSIVE0 (2 * sizeof(sljit_w))
399 #define POSSESSIVE1 (3 * sizeof(sljit_w))
400 /* Max limit of recursions. */
401 #define CALL_LIMIT (4 * sizeof(sljit_w))
402 /* The output vector is stored on the stack, and contains pointers
403 to characters. The vector data is divided into two groups: the first
404 group contains the start / end character pointers, and the second is
405 the start pointers when the end of the capturing group has not yet reached. */
406 #define OVECTOR_START (common->ovector_start)
407 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
408 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
409 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
410
411 #ifdef COMPILE_PCRE8
412 #define MOV_UCHAR SLJIT_MOV_UB
413 #define MOVU_UCHAR SLJIT_MOVU_UB
414 #else
415 #ifdef COMPILE_PCRE16
416 #define MOV_UCHAR SLJIT_MOV_UH
417 #define MOVU_UCHAR SLJIT_MOVU_UH
418 #else
419 #error Unsupported compiling mode
420 #endif
421 #endif
422
423 /* Shortcuts. */
424 #define DEFINE_COMPILER \
425 struct sljit_compiler *compiler = common->compiler
426 #define OP1(op, dst, dstw, src, srcw) \
427 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
428 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
429 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
430 #define LABEL() \
431 sljit_emit_label(compiler)
432 #define JUMP(type) \
433 sljit_emit_jump(compiler, (type))
434 #define JUMPTO(type, label) \
435 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
436 #define JUMPHERE(jump) \
437 sljit_set_label((jump), sljit_emit_label(compiler))
438 #define CMP(type, src1, src1w, src2, src2w) \
439 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
440 #define CMPTO(type, src1, src1w, src2, src2w, label) \
441 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
442 #define COND_VALUE(op, dst, dstw, type) \
443 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
444
445 static pcre_uchar* bracketend(pcre_uchar* cc)
446 {
447 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
448 do cc += GET(cc, 1); while (*cc == OP_ALT);
449 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
450 cc += 1 + LINK_SIZE;
451 return cc;
452 }
453
454 /* Functions whose might need modification for all new supported opcodes:
455 next_opcode
456 get_localspace
457 set_localptrs
458 get_framesize
459 init_frame
460 get_localsize
461 copy_locals
462 compile_hotpath
463 compile_fallbackpath
464 */
465
466 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
467 {
468 SLJIT_UNUSED_ARG(common);
469 switch(*cc)
470 {
471 case OP_SOD:
472 case OP_SOM:
473 case OP_SET_SOM:
474 case OP_NOT_WORD_BOUNDARY:
475 case OP_WORD_BOUNDARY:
476 case OP_NOT_DIGIT:
477 case OP_DIGIT:
478 case OP_NOT_WHITESPACE:
479 case OP_WHITESPACE:
480 case OP_NOT_WORDCHAR:
481 case OP_WORDCHAR:
482 case OP_ANY:
483 case OP_ALLANY:
484 case OP_ANYNL:
485 case OP_NOT_HSPACE:
486 case OP_HSPACE:
487 case OP_NOT_VSPACE:
488 case OP_VSPACE:
489 case OP_EXTUNI:
490 case OP_EODN:
491 case OP_EOD:
492 case OP_CIRC:
493 case OP_CIRCM:
494 case OP_DOLL:
495 case OP_DOLLM:
496 case OP_TYPESTAR:
497 case OP_TYPEMINSTAR:
498 case OP_TYPEPLUS:
499 case OP_TYPEMINPLUS:
500 case OP_TYPEQUERY:
501 case OP_TYPEMINQUERY:
502 case OP_TYPEPOSSTAR:
503 case OP_TYPEPOSPLUS:
504 case OP_TYPEPOSQUERY:
505 case OP_CRSTAR:
506 case OP_CRMINSTAR:
507 case OP_CRPLUS:
508 case OP_CRMINPLUS:
509 case OP_CRQUERY:
510 case OP_CRMINQUERY:
511 case OP_DEF:
512 case OP_BRAZERO:
513 case OP_BRAMINZERO:
514 case OP_BRAPOSZERO:
515 case OP_FAIL:
516 case OP_ACCEPT:
517 case OP_ASSERT_ACCEPT:
518 case OP_SKIPZERO:
519 return cc + 1;
520
521 case OP_ANYBYTE:
522 #ifdef SUPPORT_UTF
523 if (common->utf) return NULL;
524 #endif
525 return cc + 1;
526
527 case OP_CHAR:
528 case OP_CHARI:
529 case OP_NOT:
530 case OP_NOTI:
531 case OP_STAR:
532 case OP_MINSTAR:
533 case OP_PLUS:
534 case OP_MINPLUS:
535 case OP_QUERY:
536 case OP_MINQUERY:
537 case OP_POSSTAR:
538 case OP_POSPLUS:
539 case OP_POSQUERY:
540 case OP_STARI:
541 case OP_MINSTARI:
542 case OP_PLUSI:
543 case OP_MINPLUSI:
544 case OP_QUERYI:
545 case OP_MINQUERYI:
546 case OP_POSSTARI:
547 case OP_POSPLUSI:
548 case OP_POSQUERYI:
549 case OP_NOTSTAR:
550 case OP_NOTMINSTAR:
551 case OP_NOTPLUS:
552 case OP_NOTMINPLUS:
553 case OP_NOTQUERY:
554 case OP_NOTMINQUERY:
555 case OP_NOTPOSSTAR:
556 case OP_NOTPOSPLUS:
557 case OP_NOTPOSQUERY:
558 case OP_NOTSTARI:
559 case OP_NOTMINSTARI:
560 case OP_NOTPLUSI:
561 case OP_NOTMINPLUSI:
562 case OP_NOTQUERYI:
563 case OP_NOTMINQUERYI:
564 case OP_NOTPOSSTARI:
565 case OP_NOTPOSPLUSI:
566 case OP_NOTPOSQUERYI:
567 cc += 2;
568 #ifdef SUPPORT_UTF
569 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
570 #endif
571 return cc;
572
573 case OP_UPTO:
574 case OP_MINUPTO:
575 case OP_EXACT:
576 case OP_POSUPTO:
577 case OP_UPTOI:
578 case OP_MINUPTOI:
579 case OP_EXACTI:
580 case OP_POSUPTOI:
581 case OP_NOTUPTO:
582 case OP_NOTMINUPTO:
583 case OP_NOTEXACT:
584 case OP_NOTPOSUPTO:
585 case OP_NOTUPTOI:
586 case OP_NOTMINUPTOI:
587 case OP_NOTEXACTI:
588 case OP_NOTPOSUPTOI:
589 cc += 2 + IMM2_SIZE;
590 #ifdef SUPPORT_UTF
591 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
592 #endif
593 return cc;
594
595 case OP_NOTPROP:
596 case OP_PROP:
597 return cc + 1 + 2;
598
599 case OP_TYPEUPTO:
600 case OP_TYPEMINUPTO:
601 case OP_TYPEEXACT:
602 case OP_TYPEPOSUPTO:
603 case OP_REF:
604 case OP_REFI:
605 case OP_CREF:
606 case OP_NCREF:
607 case OP_RREF:
608 case OP_NRREF:
609 case OP_CLOSE:
610 cc += 1 + IMM2_SIZE;
611 return cc;
612
613 case OP_CRRANGE:
614 case OP_CRMINRANGE:
615 return cc + 1 + 2 * IMM2_SIZE;
616
617 case OP_CLASS:
618 case OP_NCLASS:
619 return cc + 1 + 32 / sizeof(pcre_uchar);
620
621 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
622 case OP_XCLASS:
623 return cc + GET(cc, 1);
624 #endif
625
626 case OP_RECURSE:
627 case OP_ASSERT:
628 case OP_ASSERT_NOT:
629 case OP_ASSERTBACK:
630 case OP_ASSERTBACK_NOT:
631 case OP_REVERSE:
632 case OP_ONCE:
633 case OP_ONCE_NC:
634 case OP_BRA:
635 case OP_BRAPOS:
636 case OP_COND:
637 case OP_SBRA:
638 case OP_SBRAPOS:
639 case OP_SCOND:
640 case OP_ALT:
641 case OP_KET:
642 case OP_KETRMAX:
643 case OP_KETRMIN:
644 case OP_KETRPOS:
645 return cc + 1 + LINK_SIZE;
646
647 case OP_CBRA:
648 case OP_CBRAPOS:
649 case OP_SCBRA:
650 case OP_SCBRAPOS:
651 return cc + 1 + LINK_SIZE + IMM2_SIZE;
652
653 default:
654 return NULL;
655 }
656 }
657
658 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
659 {
660 int localspace = 0;
661 pcre_uchar *alternative;
662 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
663 while (cc < ccend)
664 {
665 switch(*cc)
666 {
667 case OP_ASSERT:
668 case OP_ASSERT_NOT:
669 case OP_ASSERTBACK:
670 case OP_ASSERTBACK_NOT:
671 case OP_ONCE:
672 case OP_ONCE_NC:
673 case OP_BRAPOS:
674 case OP_SBRA:
675 case OP_SBRAPOS:
676 case OP_SCOND:
677 localspace += sizeof(sljit_w);
678 cc += 1 + LINK_SIZE;
679 break;
680
681 case OP_CBRAPOS:
682 case OP_SCBRAPOS:
683 localspace += sizeof(sljit_w);
684 cc += 1 + LINK_SIZE + IMM2_SIZE;
685 break;
686
687 case OP_COND:
688 /* Might be a hidden SCOND. */
689 alternative = cc + GET(cc, 1);
690 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
691 localspace += sizeof(sljit_w);
692 cc += 1 + LINK_SIZE;
693 break;
694
695 case OP_RECURSE:
696 /* Set its value only once. */
697 if (common->recursive_head == 0)
698 {
699 common->recursive_head = common->ovector_start;
700 common->ovector_start += sizeof(sljit_w);
701 }
702 cc += 1 + LINK_SIZE;
703 break;
704
705 default:
706 cc = next_opcode(common, cc);
707 if (cc == NULL)
708 return -1;
709 break;
710 }
711 }
712 return localspace;
713 }
714
715 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
716 {
717 pcre_uchar *cc = common->start;
718 pcre_uchar *alternative;
719 while (cc < ccend)
720 {
721 switch(*cc)
722 {
723 case OP_ASSERT:
724 case OP_ASSERT_NOT:
725 case OP_ASSERTBACK:
726 case OP_ASSERTBACK_NOT:
727 case OP_ONCE:
728 case OP_ONCE_NC:
729 case OP_BRAPOS:
730 case OP_SBRA:
731 case OP_SBRAPOS:
732 case OP_SCOND:
733 common->localptrs[cc - common->start] = localptr;
734 localptr += sizeof(sljit_w);
735 cc += 1 + LINK_SIZE;
736 break;
737
738 case OP_CBRAPOS:
739 case OP_SCBRAPOS:
740 common->localptrs[cc - common->start] = localptr;
741 localptr += sizeof(sljit_w);
742 cc += 1 + LINK_SIZE + IMM2_SIZE;
743 break;
744
745 case OP_COND:
746 /* Might be a hidden SCOND. */
747 alternative = cc + GET(cc, 1);
748 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
749 {
750 common->localptrs[cc - common->start] = localptr;
751 localptr += sizeof(sljit_w);
752 }
753 cc += 1 + LINK_SIZE;
754 break;
755
756 default:
757 cc = next_opcode(common, cc);
758 SLJIT_ASSERT(cc != NULL);
759 break;
760 }
761 }
762 }
763
764 /* Returns with -1 if no need for frame. */
765 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
766 {
767 pcre_uchar *ccend = bracketend(cc);
768 int length = 0;
769 BOOL possessive = FALSE;
770 BOOL setsom_found = FALSE;
771
772 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
773 {
774 length = 3;
775 possessive = TRUE;
776 }
777
778 cc = next_opcode(common, cc);
779 SLJIT_ASSERT(cc != NULL);
780 while (cc < ccend)
781 switch(*cc)
782 {
783 case OP_SET_SOM:
784 case OP_RECURSE:
785 if (!setsom_found)
786 {
787 length += 2;
788 setsom_found = TRUE;
789 }
790 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
791 break;
792
793 case OP_CBRA:
794 case OP_CBRAPOS:
795 case OP_SCBRA:
796 case OP_SCBRAPOS:
797 length += 3;
798 cc += 1 + LINK_SIZE + IMM2_SIZE;
799 break;
800
801 default:
802 cc = next_opcode(common, cc);
803 SLJIT_ASSERT(cc != NULL);
804 break;
805 }
806
807 /* Possessive quantifiers can use a special case. */
808 if (SLJIT_UNLIKELY(possessive) && length == 3)
809 return -1;
810
811 if (length > 0)
812 return length + 1;
813 return -1;
814 }
815
816 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
817 {
818 DEFINE_COMPILER;
819 pcre_uchar *ccend = bracketend(cc);
820 BOOL setsom_found = FALSE;
821 int offset;
822
823 /* >= 1 + shortest item size (2) */
824 SLJIT_UNUSED_ARG(stacktop);
825 SLJIT_ASSERT(stackpos >= stacktop + 2);
826
827 stackpos = STACK(stackpos);
828 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
829 cc = next_opcode(common, cc);
830 SLJIT_ASSERT(cc != NULL);
831 while (cc < ccend)
832 switch(*cc)
833 {
834 case OP_SET_SOM:
835 case OP_RECURSE:
836 if (!setsom_found)
837 {
838 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
840 stackpos += (int)sizeof(sljit_w);
841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
842 stackpos += (int)sizeof(sljit_w);
843 setsom_found = TRUE;
844 }
845 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
846 break;
847
848 case OP_CBRA:
849 case OP_CBRAPOS:
850 case OP_SCBRA:
851 case OP_SCBRAPOS:
852 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
854 stackpos += (int)sizeof(sljit_w);
855 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
856 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
858 stackpos += (int)sizeof(sljit_w);
859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
860 stackpos += (int)sizeof(sljit_w);
861
862 cc += 1 + LINK_SIZE + IMM2_SIZE;
863 break;
864
865 default:
866 cc = next_opcode(common, cc);
867 SLJIT_ASSERT(cc != NULL);
868 break;
869 }
870
871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
872 SLJIT_ASSERT(stackpos == STACK(stacktop));
873 }
874
875 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
876 {
877 int localsize = 2;
878 pcre_uchar *alternative;
879 /* Calculate the sum of the local variables. */
880 while (cc < ccend)
881 {
882 switch(*cc)
883 {
884 case OP_ASSERT:
885 case OP_ASSERT_NOT:
886 case OP_ASSERTBACK:
887 case OP_ASSERTBACK_NOT:
888 case OP_ONCE:
889 case OP_ONCE_NC:
890 case OP_BRAPOS:
891 case OP_SBRA:
892 case OP_SBRAPOS:
893 case OP_SCOND:
894 localsize++;
895 cc += 1 + LINK_SIZE;
896 break;
897
898 case OP_CBRA:
899 case OP_SCBRA:
900 localsize++;
901 cc += 1 + LINK_SIZE + IMM2_SIZE;
902 break;
903
904 case OP_CBRAPOS:
905 case OP_SCBRAPOS:
906 localsize += 2;
907 cc += 1 + LINK_SIZE + IMM2_SIZE;
908 break;
909
910 case OP_COND:
911 /* Might be a hidden SCOND. */
912 alternative = cc + GET(cc, 1);
913 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
914 localsize++;
915 cc += 1 + LINK_SIZE;
916 break;
917
918 default:
919 cc = next_opcode(common, cc);
920 SLJIT_ASSERT(cc != NULL);
921 break;
922 }
923 }
924 SLJIT_ASSERT(cc == ccend);
925 return localsize;
926 }
927
928 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
929 BOOL save, int stackptr, int stacktop)
930 {
931 DEFINE_COMPILER;
932 int srcw[2];
933 int count;
934 BOOL tmp1next = TRUE;
935 BOOL tmp1empty = TRUE;
936 BOOL tmp2empty = TRUE;
937 pcre_uchar *alternative;
938 enum {
939 start,
940 loop,
941 end
942 } status;
943
944 status = save ? start : loop;
945 stackptr = STACK(stackptr - 2);
946 stacktop = STACK(stacktop - 1);
947
948 if (!save)
949 {
950 stackptr += sizeof(sljit_w);
951 if (stackptr < stacktop)
952 {
953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
954 stackptr += sizeof(sljit_w);
955 tmp1empty = FALSE;
956 }
957 if (stackptr < stacktop)
958 {
959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
960 stackptr += sizeof(sljit_w);
961 tmp2empty = FALSE;
962 }
963 /* The tmp1next must be TRUE in either way. */
964 }
965
966 while (status != end)
967 {
968 count = 0;
969 switch(status)
970 {
971 case start:
972 SLJIT_ASSERT(save && common->recursive_head != 0);
973 count = 1;
974 srcw[0] = common->recursive_head;
975 status = loop;
976 break;
977
978 case loop:
979 if (cc >= ccend)
980 {
981 status = end;
982 break;
983 }
984
985 switch(*cc)
986 {
987 case OP_ASSERT:
988 case OP_ASSERT_NOT:
989 case OP_ASSERTBACK:
990 case OP_ASSERTBACK_NOT:
991 case OP_ONCE:
992 case OP_ONCE_NC:
993 case OP_BRAPOS:
994 case OP_SBRA:
995 case OP_SBRAPOS:
996 case OP_SCOND:
997 count = 1;
998 srcw[0] = PRIV_DATA(cc);
999 SLJIT_ASSERT(srcw[0] != 0);
1000 cc += 1 + LINK_SIZE;
1001 break;
1002
1003 case OP_CBRA:
1004 case OP_SCBRA:
1005 count = 1;
1006 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1007 cc += 1 + LINK_SIZE + IMM2_SIZE;
1008 break;
1009
1010 case OP_CBRAPOS:
1011 case OP_SCBRAPOS:
1012 count = 2;
1013 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1014 srcw[0] = PRIV_DATA(cc);
1015 SLJIT_ASSERT(srcw[0] != 0);
1016 cc += 1 + LINK_SIZE + IMM2_SIZE;
1017 break;
1018
1019 case OP_COND:
1020 /* Might be a hidden SCOND. */
1021 alternative = cc + GET(cc, 1);
1022 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1023 {
1024 count = 1;
1025 srcw[0] = PRIV_DATA(cc);
1026 SLJIT_ASSERT(srcw[0] != 0);
1027 }
1028 cc += 1 + LINK_SIZE;
1029 break;
1030
1031 default:
1032 cc = next_opcode(common, cc);
1033 SLJIT_ASSERT(cc != NULL);
1034 break;
1035 }
1036 break;
1037
1038 case end:
1039 SLJIT_ASSERT_STOP();
1040 break;
1041 }
1042
1043 while (count > 0)
1044 {
1045 count--;
1046 if (save)
1047 {
1048 if (tmp1next)
1049 {
1050 if (!tmp1empty)
1051 {
1052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1053 stackptr += sizeof(sljit_w);
1054 }
1055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1056 tmp1empty = FALSE;
1057 tmp1next = FALSE;
1058 }
1059 else
1060 {
1061 if (!tmp2empty)
1062 {
1063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1064 stackptr += sizeof(sljit_w);
1065 }
1066 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1067 tmp2empty = FALSE;
1068 tmp1next = TRUE;
1069 }
1070 }
1071 else
1072 {
1073 if (tmp1next)
1074 {
1075 SLJIT_ASSERT(!tmp1empty);
1076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1077 tmp1empty = stackptr >= stacktop;
1078 if (!tmp1empty)
1079 {
1080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1081 stackptr += sizeof(sljit_w);
1082 }
1083 tmp1next = FALSE;
1084 }
1085 else
1086 {
1087 SLJIT_ASSERT(!tmp2empty);
1088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1089 tmp2empty = stackptr >= stacktop;
1090 if (!tmp2empty)
1091 {
1092 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1093 stackptr += sizeof(sljit_w);
1094 }
1095 tmp1next = TRUE;
1096 }
1097 }
1098 }
1099 }
1100
1101 if (save)
1102 {
1103 if (tmp1next)
1104 {
1105 if (!tmp1empty)
1106 {
1107 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1108 stackptr += sizeof(sljit_w);
1109 }
1110 if (!tmp2empty)
1111 {
1112 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1113 stackptr += sizeof(sljit_w);
1114 }
1115 }
1116 else
1117 {
1118 if (!tmp2empty)
1119 {
1120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1121 stackptr += sizeof(sljit_w);
1122 }
1123 if (!tmp1empty)
1124 {
1125 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1126 stackptr += sizeof(sljit_w);
1127 }
1128 }
1129 }
1130 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1131 }
1132
1133 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1134 {
1135 return (value & (value - 1)) == 0;
1136 }
1137
1138 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1139 {
1140 while (list)
1141 {
1142 /* sljit_set_label is clever enough to do nothing
1143 if either the jump or the label is NULL */
1144 sljit_set_label(list->jump, label);
1145 list = list->next;
1146 }
1147 }
1148
1149 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1150 {
1151 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1152 if (list_item)
1153 {
1154 list_item->next = *list;
1155 list_item->jump = jump;
1156 *list = list_item;
1157 }
1158 }
1159
1160 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1161 {
1162 DEFINE_COMPILER;
1163 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1164
1165 if (list_item)
1166 {
1167 list_item->type = type;
1168 list_item->data = data;
1169 list_item->start = start;
1170 list_item->leave = LABEL();
1171 list_item->next = common->stubs;
1172 common->stubs = list_item;
1173 }
1174 }
1175
1176 static void flush_stubs(compiler_common *common)
1177 {
1178 DEFINE_COMPILER;
1179 stub_list* list_item = common->stubs;
1180
1181 while (list_item)
1182 {
1183 JUMPHERE(list_item->start);
1184 switch(list_item->type)
1185 {
1186 case stack_alloc:
1187 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1188 break;
1189 }
1190 JUMPTO(SLJIT_JUMP, list_item->leave);
1191 list_item = list_item->next;
1192 }
1193 common->stubs = NULL;
1194 }
1195
1196 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1197 {
1198 DEFINE_COMPILER;
1199
1200 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1201 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1202 }
1203
1204 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1205 {
1206 /* May destroy all locals and registers except TMP2. */
1207 DEFINE_COMPILER;
1208
1209 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1210 #ifdef DESTROY_REGISTERS
1211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1212 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1213 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1216 #endif
1217 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1218 }
1219
1220 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1221 {
1222 DEFINE_COMPILER;
1223 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1224 }
1225
1226 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1227 {
1228 DEFINE_COMPILER;
1229 struct sljit_label *loop;
1230 int i;
1231 /* At this point we can freely use all temporary registers. */
1232 /* TMP1 returns with begin - 1. */
1233 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1234 if (length < 8)
1235 {
1236 for (i = 0; i < length; i++)
1237 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1238 }
1239 else
1240 {
1241 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1242 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1243 loop = LABEL();
1244 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1245 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1246 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1247 }
1248 }
1249
1250 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1251 {
1252 DEFINE_COMPILER;
1253 struct sljit_label *loop;
1254 struct sljit_jump *earlyexit;
1255
1256 /* At this point we can freely use all registers. */
1257 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1258 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1259
1260 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1261 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1262 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1263 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1264 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1265 /* Unlikely, but possible */
1266 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1267 loop = LABEL();
1268 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1269 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1270 /* Copy the integer value to the output buffer */
1271 #ifdef COMPILE_PCRE16
1272 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1273 #endif
1274 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1275 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1276 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1277 JUMPHERE(earlyexit);
1278
1279 /* Calculate the return value, which is the maximum ovector value. */
1280 if (topbracket > 1)
1281 {
1282 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1283 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1284
1285 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1286 loop = LABEL();
1287 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1288 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1289 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1290 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1291 }
1292 else
1293 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1294 }
1295
1296 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1297 {
1298 DEFINE_COMPILER;
1299
1300 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1301 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1302
1303 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1304 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1305 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1306 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1307
1308 /* Store match begin and end. */
1309 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1310 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1311 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1312 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1313 #ifdef COMPILE_PCRE16
1314 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1315 #endif
1316 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1317
1318 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1319 #ifdef COMPILE_PCRE16
1320 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1321 #endif
1322 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1323
1324 JUMPTO(SLJIT_JUMP, leave);
1325 }
1326
1327 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1328 {
1329 /* May destroy TMP1. */
1330 DEFINE_COMPILER;
1331 struct sljit_jump *jump;
1332
1333 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1334 {
1335 /* The value of -1 must be kept for start_used_ptr! */
1336 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1337 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1338 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1339 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1341 JUMPHERE(jump);
1342 }
1343 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1344 {
1345 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1347 JUMPHERE(jump);
1348 }
1349 }
1350
1351 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1352 {
1353 /* Detects if the character has an othercase. */
1354 unsigned int c;
1355
1356 #ifdef SUPPORT_UTF
1357 if (common->utf)
1358 {
1359 GETCHAR(c, cc);
1360 if (c > 127)
1361 {
1362 #ifdef SUPPORT_UCP
1363 return c != UCD_OTHERCASE(c);
1364 #else
1365 return FALSE;
1366 #endif
1367 }
1368 #ifndef COMPILE_PCRE8
1369 return common->fcc[c] != c;
1370 #endif
1371 }
1372 else
1373 #endif
1374 c = *cc;
1375 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1376 }
1377
1378 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1379 {
1380 /* Returns with the othercase. */
1381 #ifdef SUPPORT_UTF
1382 if (common->utf && c > 127)
1383 {
1384 #ifdef SUPPORT_UCP
1385 return UCD_OTHERCASE(c);
1386 #else
1387 return c;
1388 #endif
1389 }
1390 #endif
1391 return TABLE_GET(c, common->fcc, c);
1392 }
1393
1394 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1395 {
1396 /* Detects if the character and its othercase has only 1 bit difference. */
1397 unsigned int c, oc, bit;
1398 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1399 int n;
1400 #endif
1401
1402 #ifdef SUPPORT_UTF
1403 if (common->utf)
1404 {
1405 GETCHAR(c, cc);
1406 if (c <= 127)
1407 oc = common->fcc[c];
1408 else
1409 {
1410 #ifdef SUPPORT_UCP
1411 oc = UCD_OTHERCASE(c);
1412 #else
1413 oc = c;
1414 #endif
1415 }
1416 }
1417 else
1418 {
1419 c = *cc;
1420 oc = TABLE_GET(c, common->fcc, c);
1421 }
1422 #else
1423 c = *cc;
1424 oc = TABLE_GET(c, common->fcc, c);
1425 #endif
1426
1427 SLJIT_ASSERT(c != oc);
1428
1429 bit = c ^ oc;
1430 /* Optimized for English alphabet. */
1431 if (c <= 127 && bit == 0x20)
1432 return (0 << 8) | 0x20;
1433
1434 /* Since c != oc, they must have at least 1 bit difference. */
1435 if (!ispowerof2(bit))
1436 return 0;
1437
1438 #ifdef COMPILE_PCRE8
1439
1440 #ifdef SUPPORT_UTF
1441 if (common->utf && c > 127)
1442 {
1443 n = GET_EXTRALEN(*cc);
1444 while ((bit & 0x3f) == 0)
1445 {
1446 n--;
1447 bit >>= 6;
1448 }
1449 return (n << 8) | bit;
1450 }
1451 #endif /* SUPPORT_UTF */
1452 return (0 << 8) | bit;
1453
1454 #else /* COMPILE_PCRE8 */
1455
1456 #ifdef COMPILE_PCRE16
1457 #ifdef SUPPORT_UTF
1458 if (common->utf && c > 65535)
1459 {
1460 if (bit >= (1 << 10))
1461 bit >>= 10;
1462 else
1463 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1464 }
1465 #endif /* SUPPORT_UTF */
1466 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1467 #endif /* COMPILE_PCRE16 */
1468
1469 #endif /* COMPILE_PCRE8 */
1470 }
1471
1472 static void check_partial(compiler_common *common, BOOL force)
1473 {
1474 /* Checks whether a partial matching is occured. Does not modify registers. */
1475 DEFINE_COMPILER;
1476 struct sljit_jump *jump = NULL;
1477
1478 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1479
1480 if (common->mode == JIT_COMPILE)
1481 return;
1482
1483 if (!force)
1484 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1485 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1486 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
1487
1488 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1490 else
1491 {
1492 if (common->partialmatchlabel != NULL)
1493 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1494 else
1495 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1496 }
1497
1498 if (jump != NULL)
1499 JUMPHERE(jump);
1500 }
1501
1502 static struct sljit_jump *check_str_end(compiler_common *common)
1503 {
1504 /* Does not affect registers. Usually used in a tight spot. */
1505 DEFINE_COMPILER;
1506 struct sljit_jump *jump;
1507 struct sljit_jump *nohit;
1508 struct sljit_jump *return_value;
1509
1510 if (common->mode == JIT_COMPILE)
1511 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1512
1513 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1514 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1515 {
1516 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1517 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1518 JUMPHERE(nohit);
1519 return_value = JUMP(SLJIT_JUMP);
1520 }
1521 else
1522 {
1523 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1524 if (common->partialmatchlabel != NULL)
1525 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1526 else
1527 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1528 }
1529 JUMPHERE(jump);
1530 return return_value;
1531 }
1532
1533 static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1534 {
1535 DEFINE_COMPILER;
1536 struct sljit_jump *jump;
1537
1538 if (common->mode == JIT_COMPILE)
1539 {
1540 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1541 return;
1542 }
1543
1544 /* Partial matching mode. */
1545 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1546 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
1547 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1548 {
1549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
1550 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1551 }
1552 else
1553 {
1554 if (common->partialmatchlabel != NULL)
1555 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1556 else
1557 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1558 }
1559 JUMPHERE(jump);
1560 }
1561
1562 static void read_char(compiler_common *common)
1563 {
1564 /* Reads the character into TMP1, updates STR_PTR.
1565 Does not check STR_END. TMP2 Destroyed. */
1566 DEFINE_COMPILER;
1567 #ifdef SUPPORT_UTF
1568 struct sljit_jump *jump;
1569 #endif
1570
1571 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1572 #ifdef SUPPORT_UTF
1573 if (common->utf)
1574 {
1575 #ifdef COMPILE_PCRE8
1576 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1577 #else
1578 #ifdef COMPILE_PCRE16
1579 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1580 #endif
1581 #endif /* COMPILE_PCRE8 */
1582 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1583 JUMPHERE(jump);
1584 }
1585 #endif
1586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1587 }
1588
1589 static void peek_char(compiler_common *common)
1590 {
1591 /* Reads the character into TMP1, keeps STR_PTR.
1592 Does not check STR_END. TMP2 Destroyed. */
1593 DEFINE_COMPILER;
1594 #ifdef SUPPORT_UTF
1595 struct sljit_jump *jump;
1596 #endif
1597
1598 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1599 #ifdef SUPPORT_UTF
1600 if (common->utf)
1601 {
1602 #ifdef COMPILE_PCRE8
1603 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1604 #else
1605 #ifdef COMPILE_PCRE16
1606 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1607 #endif
1608 #endif /* COMPILE_PCRE8 */
1609 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1610 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1611 JUMPHERE(jump);
1612 }
1613 #endif
1614 }
1615
1616 static void read_char8_type(compiler_common *common)
1617 {
1618 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1619 DEFINE_COMPILER;
1620 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1621 struct sljit_jump *jump;
1622 #endif
1623
1624 #ifdef SUPPORT_UTF
1625 if (common->utf)
1626 {
1627 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1629 #ifdef COMPILE_PCRE8
1630 /* This can be an extra read in some situations, but hopefully
1631 it is needed in most cases. */
1632 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1633 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1634 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1635 JUMPHERE(jump);
1636 #else
1637 #ifdef COMPILE_PCRE16
1638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1639 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1640 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1641 JUMPHERE(jump);
1642 /* Skip low surrogate if necessary. */
1643 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1644 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1645 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1646 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1648 #endif
1649 #endif /* COMPILE_PCRE8 */
1650 return;
1651 }
1652 #endif
1653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1654 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1655 #ifdef COMPILE_PCRE16
1656 /* The ctypes array contains only 256 values. */
1657 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1658 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1659 #endif
1660 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1661 #ifdef COMPILE_PCRE16
1662 JUMPHERE(jump);
1663 #endif
1664 }
1665
1666 static void skip_char_back(compiler_common *common)
1667 {
1668 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1669 DEFINE_COMPILER;
1670 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1671 struct sljit_label *label;
1672
1673 if (common->utf)
1674 {
1675 label = LABEL();
1676 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1677 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1678 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1679 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1680 return;
1681 }
1682 #endif
1683 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1684 if (common->utf)
1685 {
1686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1687 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1688 /* Skip low surrogate if necessary. */
1689 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1691 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1692 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1693 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1694 return;
1695 }
1696 #endif
1697 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1698 }
1699
1700 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1701 {
1702 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1703 DEFINE_COMPILER;
1704
1705 if (nltype == NLTYPE_ANY)
1706 {
1707 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1708 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1709 }
1710 else if (nltype == NLTYPE_ANYCRLF)
1711 {
1712 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1713 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1714 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1715 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1716 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1717 }
1718 else
1719 {
1720 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1721 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1722 }
1723 }
1724
1725 #ifdef SUPPORT_UTF
1726
1727 #ifdef COMPILE_PCRE8
1728 static void do_utfreadchar(compiler_common *common)
1729 {
1730 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1731 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1732 DEFINE_COMPILER;
1733 struct sljit_jump *jump;
1734
1735 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1736 /* Searching for the first zero. */
1737 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1738 jump = JUMP(SLJIT_C_NOT_ZERO);
1739 /* Two byte sequence. */
1740 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1742 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1743 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1744 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1745 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1747 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1748 JUMPHERE(jump);
1749
1750 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1751 jump = JUMP(SLJIT_C_NOT_ZERO);
1752 /* Three byte sequence. */
1753 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1754 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1755 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1756 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1757 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1758 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1761 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1762 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1764 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1765 JUMPHERE(jump);
1766
1767 /* Four byte sequence. */
1768 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1769 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1770 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1771 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1772 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1774 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1775 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1776 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1777 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1778 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1779 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1780 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1781 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1782 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1783 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1784 }
1785
1786 static void do_utfreadtype8(compiler_common *common)
1787 {
1788 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1789 of the character (>= 0xc0). Return value in TMP1. */
1790 DEFINE_COMPILER;
1791 struct sljit_jump *jump;
1792 struct sljit_jump *compare;
1793
1794 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1795
1796 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1797 jump = JUMP(SLJIT_C_NOT_ZERO);
1798 /* Two byte sequence. */
1799 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1800 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1801 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1802 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1803 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1804 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1805 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1806 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1807 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1808
1809 JUMPHERE(compare);
1810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1811 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1812 JUMPHERE(jump);
1813
1814 /* We only have types for characters less than 256. */
1815 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1816 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1818 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1819 }
1820
1821 #else /* COMPILE_PCRE8 */
1822
1823 #ifdef COMPILE_PCRE16
1824 static void do_utfreadchar(compiler_common *common)
1825 {
1826 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1827 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1828 DEFINE_COMPILER;
1829 struct sljit_jump *jump;
1830
1831 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1832 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1833 /* Do nothing, only return. */
1834 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1835
1836 JUMPHERE(jump);
1837 /* Combine two 16 bit characters. */
1838 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1839 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1840 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1841 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1843 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1845 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1846 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1847 }
1848 #endif /* COMPILE_PCRE16 */
1849
1850 #endif /* COMPILE_PCRE8 */
1851
1852 #endif /* SUPPORT_UTF */
1853
1854 #ifdef SUPPORT_UCP
1855
1856 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1857 #define UCD_BLOCK_MASK 127
1858 #define UCD_BLOCK_SHIFT 7
1859
1860 static void do_getucd(compiler_common *common)
1861 {
1862 /* Search the UCD record for the character comes in TMP1.
1863 Returns chartype in TMP1 and UCD offset in TMP2. */
1864 DEFINE_COMPILER;
1865
1866 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1867
1868 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1869 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1870 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1871 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1872 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1873 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1874 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1875 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1878 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1879 }
1880 #endif
1881
1882 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1883 {
1884 DEFINE_COMPILER;
1885 struct sljit_label *mainloop;
1886 struct sljit_label *newlinelabel = NULL;
1887 struct sljit_jump *start;
1888 struct sljit_jump *end = NULL;
1889 struct sljit_jump *nl = NULL;
1890 #ifdef SUPPORT_UTF
1891 struct sljit_jump *singlechar;
1892 #endif
1893 jump_list *newline = NULL;
1894 BOOL newlinecheck = FALSE;
1895 BOOL readuchar = FALSE;
1896
1897 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1898 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1899 newlinecheck = TRUE;
1900
1901 if (firstline)
1902 {
1903 /* Search for the end of the first line. */
1904 SLJIT_ASSERT(common->first_line_end != 0);
1905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
1907
1908 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1909 {
1910 mainloop = LABEL();
1911 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1912 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1913 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1914 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1915 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1916 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1917 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1918 }
1919 else
1920 {
1921 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1922 mainloop = LABEL();
1923 /* Continual stores does not cause data dependency. */
1924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1925 read_char(common);
1926 check_newlinechar(common, common->nltype, &newline, TRUE);
1927 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
1929 set_jumps(newline, LABEL());
1930 }
1931
1932 JUMPHERE(end);
1933 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1934 }
1935
1936 start = JUMP(SLJIT_JUMP);
1937
1938 if (newlinecheck)
1939 {
1940 newlinelabel = LABEL();
1941 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1942 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1943 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1944 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1945 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1946 #ifdef COMPILE_PCRE16
1947 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1948 #endif
1949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1950 nl = JUMP(SLJIT_JUMP);
1951 }
1952
1953 mainloop = LABEL();
1954
1955 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1956 #ifdef SUPPORT_UTF
1957 if (common->utf) readuchar = TRUE;
1958 #endif
1959 if (newlinecheck) readuchar = TRUE;
1960
1961 if (readuchar)
1962 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1963
1964 if (newlinecheck)
1965 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1966
1967 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1968 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1969 if (common->utf)
1970 {
1971 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1972 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1974 JUMPHERE(singlechar);
1975 }
1976 #endif
1977 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1978 if (common->utf)
1979 {
1980 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1981 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1982 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1983 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1986 JUMPHERE(singlechar);
1987 }
1988 #endif
1989 JUMPHERE(start);
1990
1991 if (newlinecheck)
1992 {
1993 JUMPHERE(end);
1994 JUMPHERE(nl);
1995 }
1996
1997 return mainloop;
1998 }
1999
2000 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2001 {
2002 DEFINE_COMPILER;
2003 struct sljit_label *start;
2004 struct sljit_jump *leave;
2005 struct sljit_jump *found;
2006 pcre_uchar oc, bit;
2007
2008 if (firstline)
2009 {
2010 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2011 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2012 }
2013
2014 start = LABEL();
2015 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2016 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2017
2018 oc = first_char;
2019 if (caseless)
2020 {
2021 oc = TABLE_GET(first_char, common->fcc, first_char);
2022 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2023 if (first_char > 127 && common->utf)
2024 oc = UCD_OTHERCASE(first_char);
2025 #endif
2026 }
2027 if (first_char == oc)
2028 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2029 else
2030 {
2031 bit = first_char ^ oc;
2032 if (ispowerof2(bit))
2033 {
2034 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2035 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2036 }
2037 else
2038 {
2039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2040 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2042 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2043 found = JUMP(SLJIT_C_NOT_ZERO);
2044 }
2045 }
2046
2047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2048 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2049 if (common->utf)
2050 {
2051 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2052 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2053 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2054 }
2055 #endif
2056 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2057 if (common->utf)
2058 {
2059 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2060 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2062 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2063 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2065 }
2066 #endif
2067 JUMPTO(SLJIT_JUMP, start);
2068 JUMPHERE(found);
2069 JUMPHERE(leave);
2070
2071 if (firstline)
2072 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2073 }
2074
2075 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2076 {
2077 DEFINE_COMPILER;
2078 struct sljit_label *loop;
2079 struct sljit_jump *lastchar;
2080 struct sljit_jump *firstchar;
2081 struct sljit_jump *leave;
2082 struct sljit_jump *foundcr = NULL;
2083 struct sljit_jump *notfoundnl;
2084 jump_list *newline = NULL;
2085
2086 if (firstline)
2087 {
2088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2089 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2090 }
2091
2092 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2093 {
2094 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2095 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2096 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2097 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2098 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2099
2100 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2101 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2102 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2103 #ifdef COMPILE_PCRE16
2104 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2105 #endif
2106 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2107
2108 loop = LABEL();
2109 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2110 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2111 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2112 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2113 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2114 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2115
2116 JUMPHERE(leave);
2117 JUMPHERE(firstchar);
2118 JUMPHERE(lastchar);
2119
2120 if (firstline)
2121 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2122 return;
2123 }
2124
2125 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2126 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2127 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2128 skip_char_back(common);
2129
2130 loop = LABEL();
2131 read_char(common);
2132 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2133 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2134 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2135 check_newlinechar(common, common->nltype, &newline, FALSE);
2136 set_jumps(newline, loop);
2137
2138 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2139 {
2140 leave = JUMP(SLJIT_JUMP);
2141 JUMPHERE(foundcr);
2142 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2143 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2144 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2145 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2146 #ifdef COMPILE_PCRE16
2147 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2148 #endif
2149 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2150 JUMPHERE(notfoundnl);
2151 JUMPHERE(leave);
2152 }
2153 JUMPHERE(lastchar);
2154 JUMPHERE(firstchar);
2155
2156 if (firstline)
2157 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2158 }
2159
2160 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2161 {
2162 DEFINE_COMPILER;
2163 struct sljit_label *start;
2164 struct sljit_jump *leave;
2165 struct sljit_jump *found;
2166 #ifndef COMPILE_PCRE8
2167 struct sljit_jump *jump;
2168 #endif
2169
2170 if (firstline)
2171 {
2172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2173 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2174 }
2175
2176 start = LABEL();
2177 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2178 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2179 #ifdef SUPPORT_UTF
2180 if (common->utf)
2181 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2182 #endif
2183 #ifndef COMPILE_PCRE8
2184 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2186 JUMPHERE(jump);
2187 #endif
2188 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2189 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2190 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2191 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2192 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2193 found = JUMP(SLJIT_C_NOT_ZERO);
2194
2195 #ifdef SUPPORT_UTF
2196 if (common->utf)
2197 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2198 #endif
2199 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2200 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2201 if (common->utf)
2202 {
2203 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2204 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2206 }
2207 #endif
2208 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2209 if (common->utf)
2210 {
2211 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2212 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2213 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2214 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2215 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2217 }
2218 #endif
2219 JUMPTO(SLJIT_JUMP, start);
2220 JUMPHERE(found);
2221 JUMPHERE(leave);
2222
2223 if (firstline)
2224 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2225 }
2226
2227 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2228 {
2229 DEFINE_COMPILER;
2230 struct sljit_label *loop;
2231 struct sljit_jump *toolong;
2232 struct sljit_jump *alreadyfound;
2233 struct sljit_jump *found;
2234 struct sljit_jump *foundoc = NULL;
2235 struct sljit_jump *notfound;
2236 pcre_uchar oc, bit;
2237
2238 SLJIT_ASSERT(common->req_char_ptr != 0);
2239 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2240 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2241 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2242 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2243
2244 if (has_firstchar)
2245 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2246 else
2247 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2248
2249 loop = LABEL();
2250 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2251
2252 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2253 oc = req_char;
2254 if (caseless)
2255 {
2256 oc = TABLE_GET(req_char, common->fcc, req_char);
2257 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2258 if (req_char > 127 && common->utf)
2259 oc = UCD_OTHERCASE(req_char);
2260 #endif
2261 }
2262 if (req_char == oc)
2263 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2264 else
2265 {
2266 bit = req_char ^ oc;
2267 if (ispowerof2(bit))
2268 {
2269 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2270 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2271 }
2272 else
2273 {
2274 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2275 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2276 }
2277 }
2278 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2279 JUMPTO(SLJIT_JUMP, loop);
2280
2281 JUMPHERE(found);
2282 if (foundoc)
2283 JUMPHERE(foundoc);
2284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2285 JUMPHERE(alreadyfound);
2286 JUMPHERE(toolong);
2287 return notfound;
2288 }
2289
2290 static void do_revertframes(compiler_common *common)
2291 {
2292 DEFINE_COMPILER;
2293 struct sljit_jump *jump;
2294 struct sljit_label *mainloop;
2295
2296 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2297 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2298
2299 /* Drop frames until we reach STACK_TOP. */
2300 mainloop = LABEL();
2301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2302 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2303 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2304 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2305 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2306 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2307 JUMPTO(SLJIT_JUMP, mainloop);
2308
2309 JUMPHERE(jump);
2310 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2311 /* End of dropping frames. */
2312 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2313
2314 JUMPHERE(jump);
2315 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2316 /* Set string begin. */
2317 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2318 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2320 JUMPTO(SLJIT_JUMP, mainloop);
2321
2322 JUMPHERE(jump);
2323 /* Unknown command. */
2324 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2325 JUMPTO(SLJIT_JUMP, mainloop);
2326 }
2327
2328 static void check_wordboundary(compiler_common *common)
2329 {
2330 DEFINE_COMPILER;
2331 struct sljit_jump *skipread;
2332 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2333 struct sljit_jump *jump;
2334 #endif
2335
2336 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2337
2338 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2339 /* Get type of the previous char, and put it to LOCALS1. */
2340 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2342 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2343 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2344 skip_char_back(common);
2345 check_start_used_ptr(common);
2346 read_char(common);
2347
2348 /* Testing char type. */
2349 #ifdef SUPPORT_UCP
2350 if (common->use_ucp)
2351 {
2352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2353 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2354 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2355 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2356 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2357 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2358 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2359 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2360 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2361 JUMPHERE(jump);
2362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2363 }
2364 else
2365 #endif
2366 {
2367 #ifndef COMPILE_PCRE8
2368 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2369 #elif defined SUPPORT_UTF
2370 /* Here LOCALS1 has already been zeroed. */
2371 jump = NULL;
2372 if (common->utf)
2373 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2374 #endif /* COMPILE_PCRE8 */
2375 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2376 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2377 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2378 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2379 #ifndef COMPILE_PCRE8
2380 JUMPHERE(jump);
2381 #elif defined SUPPORT_UTF
2382 if (jump != NULL)
2383 JUMPHERE(jump);
2384 #endif /* COMPILE_PCRE8 */
2385 }
2386 JUMPHERE(skipread);
2387
2388 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2389 skipread = check_str_end(common);
2390 peek_char(common);
2391
2392 /* Testing char type. This is a code duplication. */
2393 #ifdef SUPPORT_UCP
2394 if (common->use_ucp)
2395 {
2396 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2397 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2398 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2399 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2400 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2401 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2402 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2403 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2404 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2405 JUMPHERE(jump);
2406 }
2407 else
2408 #endif
2409 {
2410 #ifndef COMPILE_PCRE8
2411 /* TMP2 may be destroyed by peek_char. */
2412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2413 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2414 #elif defined SUPPORT_UTF
2415 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2416 jump = NULL;
2417 if (common->utf)
2418 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2419 #endif
2420 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2421 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2422 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2423 #ifndef COMPILE_PCRE8
2424 JUMPHERE(jump);
2425 #elif defined SUPPORT_UTF
2426 if (jump != NULL)
2427 JUMPHERE(jump);
2428 #endif /* COMPILE_PCRE8 */
2429 }
2430 JUMPHERE(skipread);
2431
2432 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2433 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2434 }
2435
2436 static void check_anynewline(compiler_common *common)
2437 {
2438 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2439 DEFINE_COMPILER;
2440
2441 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2442
2443 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2444 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2445 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2447 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2448 #ifdef COMPILE_PCRE8
2449 if (common->utf)
2450 {
2451 #endif
2452 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2453 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2454 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2455 #ifdef COMPILE_PCRE8
2456 }
2457 #endif
2458 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2459 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2460 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2461 }
2462
2463 static void check_hspace(compiler_common *common)
2464 {
2465 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2466 DEFINE_COMPILER;
2467
2468 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2469
2470 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2471 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2472 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2473 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2474 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2475 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2476 #ifdef COMPILE_PCRE8
2477 if (common->utf)
2478 {
2479 #endif
2480 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2481 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2482 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2483 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2484 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2485 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2486 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2487 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2488 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2489 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2490 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2491 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2492 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2493 #ifdef COMPILE_PCRE8
2494 }
2495 #endif
2496 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2497 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2498
2499 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2500 }
2501
2502 static void check_vspace(compiler_common *common)
2503 {
2504 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2505 DEFINE_COMPILER;
2506
2507 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2508
2509 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2510 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2511 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2512 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2513 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2514 #ifdef COMPILE_PCRE8
2515 if (common->utf)
2516 {
2517 #endif
2518 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2519 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2520 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2521 #ifdef COMPILE_PCRE8
2522 }
2523 #endif
2524 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2525 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2526
2527 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2528 }
2529
2530 #define CHAR1 STR_END
2531 #define CHAR2 STACK_TOP
2532
2533 static void do_casefulcmp(compiler_common *common)
2534 {
2535 DEFINE_COMPILER;
2536 struct sljit_jump *jump;
2537 struct sljit_label *label;
2538
2539 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2540 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2541 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2542 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2543 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2544 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545
2546 label = LABEL();
2547 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2548 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2549 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2550 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2551 JUMPTO(SLJIT_C_NOT_ZERO, label);
2552
2553 JUMPHERE(jump);
2554 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2555 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2556 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2557 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2558 }
2559
2560 #define LCC_TABLE STACK_LIMIT
2561
2562 static void do_caselesscmp(compiler_common *common)
2563 {
2564 DEFINE_COMPILER;
2565 struct sljit_jump *jump;
2566 struct sljit_label *label;
2567
2568 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2569 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2570
2571 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2573 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2574 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2575 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2576 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2577
2578 label = LABEL();
2579 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2580 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2581 #ifndef COMPILE_PCRE8
2582 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2583 #endif
2584 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2585 #ifndef COMPILE_PCRE8
2586 JUMPHERE(jump);
2587 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2588 #endif
2589 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2590 #ifndef COMPILE_PCRE8
2591 JUMPHERE(jump);
2592 #endif
2593 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2594 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2595 JUMPTO(SLJIT_C_NOT_ZERO, label);
2596
2597 JUMPHERE(jump);
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2600 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2601 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2602 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2603 }
2604
2605 #undef LCC_TABLE
2606 #undef CHAR1
2607 #undef CHAR2
2608
2609 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2610
2611 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2612 {
2613 /* This function would be ineffective to do in JIT level. */
2614 int c1, c2;
2615 const pcre_uchar *src2 = args->ptr;
2616 const pcre_uchar *end2 = args->end;
2617
2618 while (src1 < end1)
2619 {
2620 if (src2 >= end2)
2621 return (pcre_uchar*)1;
2622 GETCHARINC(c1, src1);
2623 GETCHARINC(c2, src2);
2624 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2625 }
2626 return src2;
2627 }
2628
2629 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2630
2631 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2632 compare_context* context, jump_list **fallbacks)
2633 {
2634 DEFINE_COMPILER;
2635 unsigned int othercasebit = 0;
2636 pcre_uchar *othercasechar = NULL;
2637 #ifdef SUPPORT_UTF
2638 int utflength;
2639 #endif
2640
2641 if (caseless && char_has_othercase(common, cc))
2642 {
2643 othercasebit = char_get_othercase_bit(common, cc);
2644 SLJIT_ASSERT(othercasebit);
2645 /* Extracting bit difference info. */
2646 #ifdef COMPILE_PCRE8
2647 othercasechar = cc + (othercasebit >> 8);
2648 othercasebit &= 0xff;
2649 #else
2650 #ifdef COMPILE_PCRE16
2651 othercasechar = cc + (othercasebit >> 9);
2652 if ((othercasebit & 0x100) != 0)
2653 othercasebit = (othercasebit & 0xff) << 8;
2654 else
2655 othercasebit &= 0xff;
2656 #endif
2657 #endif
2658 }
2659
2660 if (context->sourcereg == -1)
2661 {
2662 #ifdef COMPILE_PCRE8
2663 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2664 if (context->length >= 4)
2665 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2666 else if (context->length >= 2)
2667 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2668 else
2669 #endif
2670 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2671 #else
2672 #ifdef COMPILE_PCRE16
2673 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2674 if (context->length >= 4)
2675 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2676 else
2677 #endif
2678 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2679 #endif
2680 #endif /* COMPILE_PCRE8 */
2681 context->sourcereg = TMP2;
2682 }
2683
2684 #ifdef SUPPORT_UTF
2685 utflength = 1;
2686 if (common->utf && HAS_EXTRALEN(*cc))
2687 utflength += GET_EXTRALEN(*cc);
2688
2689 do
2690 {
2691 #endif
2692
2693 context->length -= IN_UCHARS(1);
2694 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2695
2696 /* Unaligned read is supported. */
2697 if (othercasebit != 0 && othercasechar == cc)
2698 {
2699 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2700 context->oc.asuchars[context->ucharptr] = othercasebit;
2701 }
2702 else
2703 {
2704 context->c.asuchars[context->ucharptr] = *cc;
2705 context->oc.asuchars[context->ucharptr] = 0;
2706 }
2707 context->ucharptr++;
2708
2709 #ifdef COMPILE_PCRE8
2710 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2711 #else
2712 if (context->ucharptr >= 2 || context->length == 0)
2713 #endif
2714 {
2715 if (context->length >= 4)
2716 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2717 #ifdef COMPILE_PCRE8
2718 else if (context->length >= 2)
2719 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2720 else if (context->length >= 1)
2721 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2722 #else
2723 else if (context->length >= 2)
2724 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2725 #endif
2726 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2727
2728 switch(context->ucharptr)
2729 {
2730 case 4 / sizeof(pcre_uchar):
2731 if (context->oc.asint != 0)
2732 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2733 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2734 break;
2735
2736 case 2 / sizeof(pcre_uchar):
2737 if (context->oc.asushort != 0)
2738 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2739 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2740 break;
2741
2742 #ifdef COMPILE_PCRE8
2743 case 1:
2744 if (context->oc.asbyte != 0)
2745 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2746 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2747 break;
2748 #endif
2749
2750 default:
2751 SLJIT_ASSERT_STOP();
2752 break;
2753 }
2754 context->ucharptr = 0;
2755 }
2756
2757 #else
2758
2759 /* Unaligned read is unsupported. */
2760 #ifdef COMPILE_PCRE8
2761 if (context->length > 0)
2762 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2763 #else
2764 if (context->length > 0)
2765 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2766 #endif
2767 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2768
2769 if (othercasebit != 0 && othercasechar == cc)
2770 {
2771 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2772 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2773 }
2774 else
2775 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2776
2777 #endif
2778
2779 cc++;
2780 #ifdef SUPPORT_UTF
2781 utflength--;
2782 }
2783 while (utflength > 0);
2784 #endif
2785
2786 return cc;
2787 }
2788
2789 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2790
2791 #define SET_TYPE_OFFSET(value) \
2792 if ((value) != typeoffset) \
2793 { \
2794 if ((value) > typeoffset) \
2795 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2796 else \
2797 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2798 } \
2799 typeoffset = (value);
2800
2801 #define SET_CHAR_OFFSET(value) \
2802 if ((value) != charoffset) \
2803 { \
2804 if ((value) > charoffset) \
2805 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2806 else \
2807 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2808 } \
2809 charoffset = (value);
2810
2811 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2812 {
2813 DEFINE_COMPILER;
2814 jump_list *found = NULL;
2815 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2816 unsigned int c;
2817 int compares;
2818 struct sljit_jump *jump = NULL;
2819 pcre_uchar *ccbegin;
2820 #ifdef SUPPORT_UCP
2821 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2822 BOOL charsaved = FALSE;
2823 int typereg = TMP1, scriptreg = TMP1;
2824 unsigned int typeoffset;
2825 #endif
2826 int invertcmp, numberofcmps;
2827 unsigned int charoffset;
2828
2829 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2830 fallback_at_str_end(common, fallbacks);
2831 read_char(common);
2832
2833 if ((*cc++ & XCL_MAP) != 0)
2834 {
2835 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2836 #ifndef COMPILE_PCRE8
2837 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2838 #elif defined SUPPORT_UTF
2839 if (common->utf)
2840 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2841 #endif
2842
2843 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2844 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2845 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2846 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2847 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2848 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2849
2850 #ifndef COMPILE_PCRE8
2851 JUMPHERE(jump);
2852 #elif defined SUPPORT_UTF
2853 if (common->utf)
2854 JUMPHERE(jump);
2855 #endif
2856 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2857 #ifdef SUPPORT_UCP
2858 charsaved = TRUE;
2859 #endif
2860 cc += 32 / sizeof(pcre_uchar);
2861 }
2862
2863 /* Scanning the necessary info. */
2864 ccbegin = cc;
2865 compares = 0;
2866 while (*cc != XCL_END)
2867 {
2868 compares++;
2869 if (*cc == XCL_SINGLE)
2870 {
2871 cc += 2;
2872 #ifdef SUPPORT_UTF
2873 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2874 #endif
2875 #ifdef SUPPORT_UCP
2876 needschar = TRUE;
2877 #endif
2878 }
2879 else if (*cc == XCL_RANGE)
2880 {
2881 cc += 2;
2882 #ifdef SUPPORT_UTF
2883 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2884 #endif
2885 cc++;
2886 #ifdef SUPPORT_UTF
2887 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2888 #endif
2889 #ifdef SUPPORT_UCP
2890 needschar = TRUE;
2891 #endif
2892 }
2893 #ifdef SUPPORT_UCP
2894 else
2895 {
2896 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2897 cc++;
2898 switch(*cc)
2899 {
2900 case PT_ANY:
2901 break;
2902
2903 case PT_LAMP:
2904 case PT_GC:
2905 case PT_PC:
2906 case PT_ALNUM:
2907 needstype = TRUE;
2908 break;
2909
2910 case PT_SC:
2911 needsscript = TRUE;
2912 break;
2913
2914 case PT_SPACE:
2915 case PT_PXSPACE:
2916 case PT_WORD:
2917 needstype = TRUE;
2918 needschar = TRUE;
2919 break;
2920
2921 default:
2922 SLJIT_ASSERT_STOP();
2923 break;
2924 }
2925 cc += 2;
2926 }
2927 #endif
2928 }
2929
2930 #ifdef SUPPORT_UCP
2931 /* Simple register allocation. TMP1 is preferred if possible. */
2932 if (needstype || needsscript)
2933 {
2934 if (needschar && !charsaved)
2935 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2936 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2937 if (needschar)
2938 {
2939 if (needstype)
2940 {
2941 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2942 typereg = RETURN_ADDR;
2943 }
2944
2945 if (needsscript)
2946 scriptreg = TMP3;
2947 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2948 }
2949 else if (needstype && needsscript)
2950 scriptreg = TMP3;
2951 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2952
2953 if (needsscript)
2954 {
2955 if (scriptreg == TMP1)
2956 {
2957 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2958 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2959 }
2960 else
2961 {
2962 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2963 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2964 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2965 }
2966 }
2967 }
2968 #endif
2969
2970 /* Generating code. */
2971 cc = ccbegin;
2972 charoffset = 0;
2973 numberofcmps = 0;
2974 #ifdef SUPPORT_UCP
2975 typeoffset = 0;
2976 #endif
2977
2978 while (*cc != XCL_END)
2979 {
2980 compares--;
2981 invertcmp = (compares == 0 && list != fallbacks);
2982 jump = NULL;
2983
2984 if (*cc == XCL_SINGLE)
2985 {
2986 cc ++;
2987 #ifdef SUPPORT_UTF
2988 if (common->utf)
2989 {
2990 GETCHARINC(c, cc);
2991 }
2992 else
2993 #endif
2994 c = *cc++;
2995
2996 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2997 {
2998 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2999 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3000 numberofcmps++;
3001 }
3002 else if (numberofcmps > 0)
3003 {
3004 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3005 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3006 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3007 numberofcmps = 0;
3008 }
3009 else
3010 {
3011 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3012 numberofcmps = 0;
3013 }
3014 }
3015 else if (*cc == XCL_RANGE)
3016 {
3017 cc ++;
3018 #ifdef SUPPORT_UTF
3019 if (common->utf)
3020 {
3021 GETCHARINC(c, cc);
3022 }
3023 else
3024 #endif
3025 c = *cc++;
3026 SET_CHAR_OFFSET(c);
3027 #ifdef SUPPORT_UTF
3028 if (common->utf)
3029 {
3030 GETCHARINC(c, cc);
3031 }
3032 else
3033 #endif
3034 c = *cc++;
3035 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3036 {
3037 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3038 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3039 numberofcmps++;
3040 }
3041 else if (numberofcmps > 0)
3042 {
3043 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3044 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3045 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3046 numberofcmps = 0;
3047 }
3048 else
3049 {
3050 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3051 numberofcmps = 0;
3052 }
3053 }
3054 #ifdef SUPPORT_UCP
3055 else
3056 {
3057 if (*cc == XCL_NOTPROP)
3058 invertcmp ^= 0x1;
3059 cc++;
3060 switch(*cc)
3061 {
3062 case PT_ANY:
3063 if (list != fallbacks)
3064 {
3065 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3066 continue;
3067 }
3068 else if (cc[-1] == XCL_NOTPROP)
3069 continue;
3070 jump = JUMP(SLJIT_JUMP);
3071 break;
3072
3073 case PT_LAMP:
3074 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3075 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3077 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3079 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3080 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3081 break;
3082
3083 case PT_GC:
3084 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3085 SET_TYPE_OFFSET(c);
3086 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3087 break;
3088
3089 case PT_PC:
3090 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3091 break;
3092
3093 case PT_SC:
3094 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3095 break;
3096
3097 case PT_SPACE:
3098 case PT_PXSPACE:
3099 if (*cc == PT_SPACE)
3100 {
3101 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3102 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3103 }
3104 SET_CHAR_OFFSET(9);
3105 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3106 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3107 if (*cc == PT_SPACE)
3108 JUMPHERE(jump);
3109
3110 SET_TYPE_OFFSET(ucp_Zl);
3111 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3112 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3113 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3114 break;
3115
3116 case PT_WORD:
3117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3118 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3119 /* ... fall through */
3120
3121 case PT_ALNUM:
3122 SET_TYPE_OFFSET(ucp_Ll);
3123 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3124 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3125 SET_TYPE_OFFSET(ucp_Nd);
3126 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3127 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3128 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3129 break;
3130 }
3131 cc += 2;
3132 }
3133 #endif
3134
3135 if (jump != NULL)
3136 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3137 }
3138
3139 if (found != NULL)
3140 set_jumps(found, LABEL());
3141 }
3142
3143 #undef SET_TYPE_OFFSET
3144 #undef SET_CHAR_OFFSET
3145
3146 #endif
3147
3148 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3149 {
3150 DEFINE_COMPILER;
3151 int length;
3152 unsigned int c, oc, bit;
3153 compare_context context;
3154 struct sljit_jump *jump[4];
3155 #ifdef SUPPORT_UTF
3156 struct sljit_label *label;
3157 #ifdef SUPPORT_UCP
3158 pcre_uchar propdata[5];
3159 #endif
3160 #endif
3161
3162 switch(type)
3163 {
3164 case OP_SOD:
3165 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3167 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3168 return cc;
3169
3170 case OP_SOM:
3171 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3173 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3174 return cc;
3175
3176 case OP_NOT_WORD_BOUNDARY:
3177 case OP_WORD_BOUNDARY:
3178 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3179 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3180 return cc;
3181
3182 case OP_NOT_DIGIT:
3183 case OP_DIGIT:
3184 fallback_at_str_end(common, fallbacks);
3185 read_char8_type(common);
3186 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3187 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3188 return cc;
3189
3190 case OP_NOT_WHITESPACE:
3191 case OP_WHITESPACE:
3192 fallback_at_str_end(common, fallbacks);
3193 read_char8_type(common);
3194 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3195 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3196 return cc;
3197
3198 case OP_NOT_WORDCHAR:
3199 case OP_WORDCHAR:
3200 fallback_at_str_end(common, fallbacks);
3201 read_char8_type(common);
3202 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3203 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3204 return cc;
3205
3206 case OP_ANY:
3207 fallback_at_str_end(common, fallbacks);
3208 read_char(common);
3209 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3210 {
3211 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3212 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3213 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3214 else
3215 jump[1] = check_str_end(common);
3216
3217 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3218 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3219 if (jump[1] != NULL)
3220 JUMPHERE(jump[1]);
3221 JUMPHERE(jump[0]);
3222 }
3223 else
3224 check_newlinechar(common, common->nltype, fallbacks, TRUE);
3225 return cc;
3226
3227 case OP_ALLANY:
3228 fallback_at_str_end(common, fallbacks);
3229 #ifdef SUPPORT_UTF
3230 if (common->utf)
3231 {
3232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3234 #ifdef COMPILE_PCRE8
3235 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3236 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3237 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3238 #else /* COMPILE_PCRE8 */
3239 #ifdef COMPILE_PCRE16
3240 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3241 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3242 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3243 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3244 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3246 #endif /* COMPILE_PCRE16 */
3247 #endif /* COMPILE_PCRE8 */
3248 JUMPHERE(jump[0]);
3249 return cc;
3250 }
3251 #endif
3252 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3253 return cc;
3254
3255 case OP_ANYBYTE:
3256 fallback_at_str_end(common, fallbacks);
3257 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3258 return cc;
3259
3260 #ifdef SUPPORT_UTF
3261 #ifdef SUPPORT_UCP
3262 case OP_NOTPROP:
3263 case OP_PROP:
3264 propdata[0] = 0;
3265 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3266 propdata[2] = cc[0];
3267 propdata[3] = cc[1];
3268 propdata[4] = XCL_END;
3269 compile_xclass_hotpath(common, propdata, fallbacks);
3270 return cc + 2;
3271 #endif
3272 #endif
3273
3274 case OP_ANYNL:
3275 fallback_at_str_end(common, fallbacks);
3276 read_char(common);
3277 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3278 /* We don't need to handle soft partial matching case. */
3279 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3280 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3281 else
3282 jump[1] = check_str_end(common);
3283 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3284 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3286 jump[3] = JUMP(SLJIT_JUMP);
3287 JUMPHERE(jump[0]);
3288 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3289 JUMPHERE(jump[1]);
3290 JUMPHERE(jump[2]);
3291 JUMPHERE(jump[3]);
3292 return cc;
3293
3294 case OP_NOT_HSPACE:
3295 case OP_HSPACE:
3296 fallback_at_str_end(common, fallbacks);
3297 read_char(common);
3298 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3299 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3300 return cc;
3301
3302 case OP_NOT_VSPACE:
3303 case OP_VSPACE:
3304 fallback_at_str_end(common, fallbacks);
3305 read_char(common);
3306 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3307 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3308 return cc;
3309
3310 #ifdef SUPPORT_UCP
3311 case OP_EXTUNI:
3312 fallback_at_str_end(common, fallbacks);
3313 read_char(common);
3314 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3315 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3316 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3317
3318 label = LABEL();
3319 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3320 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3321 read_char(common);
3322 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3324 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3325
3326 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3327 JUMPHERE(jump[0]);
3328 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3329 {
3330 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3331 /* Since we successfully read a char above, partial matching must occure. */
3332 check_partial(common, TRUE);
3333 JUMPHERE(jump[0]);
3334 }
3335 return cc;
3336 #endif
3337
3338 case OP_EODN:
3339 /* Requires rather complex checks. */
3340 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3341 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3342 {
3343 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3345 if (common->mode == JIT_COMPILE)
3346 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3347 else
3348 {
3349 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3350 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3351 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3352 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3353 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3354 add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3355 check_partial(common, TRUE);
3356 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3357 JUMPHERE(jump[1]);
3358 }
3359 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3360 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3361 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3362 }
3363 else if (common->nltype == NLTYPE_FIXED)
3364 {
3365 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3366 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3367 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3368 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3369 }
3370 else
3371 {
3372 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3373 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3374 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3375 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3376 jump[2] = JUMP(SLJIT_C_GREATER);
3377 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3378 /* Equal. */
3379 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3380 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3381 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3382
3383 JUMPHERE(jump[1]);
3384 if (common->nltype == NLTYPE_ANYCRLF)
3385 {
3386 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3387 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3388 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3389 }
3390 else
3391 {
3392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3393 read_char(common);
3394 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3395 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3396 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3397 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3398 }
3399 JUMPHERE(jump[2]);
3400 JUMPHERE(jump[3]);
3401 }
3402 JUMPHERE(jump[0]);
3403 check_partial(common, FALSE);
3404 return cc;
3405
3406 case OP_EOD:
3407 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3408 check_partial(common, FALSE);
3409 return cc;
3410
3411 case OP_CIRC:
3412 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3414 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3415 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3416 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3417 return cc;
3418
3419 case OP_CIRCM:
3420 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3422 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3423 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3424 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3425 jump[0] = JUMP(SLJIT_JUMP);
3426 JUMPHERE(jump[1]);
3427
3428 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3429 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3430 {
3431 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3432 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3433 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3434 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3435 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3436 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3437 }
3438 else
3439 {
3440 skip_char_back(common);
3441 read_char(common);
3442 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3443 }
3444 JUMPHERE(jump[0]);
3445 return cc;
3446
3447 case OP_DOLL:
3448 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3449 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3450 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3451
3452 if (!common->endonly)
3453 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3454 else
3455 {
3456 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3457 check_partial(common, FALSE);
3458 }
3459 return cc;
3460
3461 case OP_DOLLM:
3462 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3463 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3464 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3465 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3466 check_partial(common, FALSE);
3467 jump[0] = JUMP(SLJIT_JUMP);
3468 JUMPHERE(jump[1]);
3469
3470 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3471 {
3472 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3473 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3474 if (common->mode == JIT_COMPILE)
3475 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3476 else
3477 {
3478 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3479 /* STR_PTR = STR_END - IN_UCHARS(1) */
3480 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3481 check_partial(common, TRUE);
3482 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3483 JUMPHERE(jump[1]);
3484 }
3485
3486 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3487 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3488 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3489 }
3490 else
3491 {
3492 peek_char(common);
3493 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3494 }
3495 JUMPHERE(jump[0]);
3496 return cc;
3497
3498 case OP_CHAR:
3499 case OP_CHARI:
3500 length = 1;
3501 #ifdef SUPPORT_UTF
3502 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3503 #endif
3504 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3505 {
3506 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3507 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3508
3509 context.length = IN_UCHARS(length);
3510 context.sourcereg = -1;
3511 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3512 context.ucharptr = 0;
3513 #endif
3514 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3515 }
3516 fallback_at_str_end(common, fallbacks);
3517 read_char(common);
3518 #ifdef SUPPORT_UTF
3519 if (common->utf)
3520 {
3521 GETCHAR(c, cc);
3522 }
3523 else
3524 #endif
3525 c = *cc;
3526 if (type == OP_CHAR || !char_has_othercase(common, cc))
3527 {
3528 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3529 return cc + length;
3530 }
3531 oc = char_othercase(common, c);
3532 bit = c ^ oc;
3533 if (ispowerof2(bit))
3534 {
3535 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3536 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3537 return cc + length;
3538 }
3539 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3540 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3541 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3542 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3543 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3544 return cc + length;
3545
3546 case OP_NOT:
3547 case OP_NOTI:
3548 fallback_at_str_end(common, fallbacks);
3549 length = 1;
3550 #ifdef SUPPORT_UTF
3551 if (common->utf)
3552 {
3553 #ifdef COMPILE_PCRE8
3554 c = *cc;
3555 if (c < 128)
3556 {
3557 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3558 if (type == OP_NOT || !char_has_othercase(common, cc))
3559 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3560 else
3561 {
3562 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3563 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3564 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3565 }
3566 /* Skip the variable-length character. */
3567 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3568 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3569 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3571 JUMPHERE(jump[0]);
3572 return cc + 1;
3573 }
3574 else
3575 #endif /* COMPILE_PCRE8 */
3576 {
3577 GETCHARLEN(c, cc, length);
3578 read_char(common);
3579 }
3580 }
3581 else
3582 #endif /* SUPPORT_UTF */
3583 {
3584 read_char(common);
3585 c = *cc;
3586 }
3587
3588 if (type == OP_NOT || !char_has_othercase(common, cc))
3589 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3590 else
3591 {
3592 oc = char_othercase(common, c);
3593 bit = c ^ oc;
3594 if (ispowerof2(bit))
3595 {
3596 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3597 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3598 }
3599 else
3600 {
3601 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3602 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3603 }
3604 }
3605 return cc + length;
3606
3607 case OP_CLASS:
3608 case OP_NCLASS:
3609 fallback_at_str_end(common, fallbacks);
3610 read_char(common);
3611 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3612 jump[0] = NULL;
3613 #ifdef COMPILE_PCRE8
3614 /* This check only affects 8 bit mode. In other modes, we
3615 always need to compare the value with 255. */
3616 if (common->utf)
3617 #endif /* COMPILE_PCRE8 */
3618 {
3619 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3620 if (type == OP_CLASS)
3621 {
3622 add_jump(compiler, fallbacks, jump[0]);
3623 jump[0] = NULL;
3624 }
3625 }
3626 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3627 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3628 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3629 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3630 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3631 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3632 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3633 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3634 if (jump[0] != NULL)
3635 JUMPHERE(jump[0]);
3636 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3637 return cc + 32 / sizeof(pcre_uchar);
3638
3639 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3640 case OP_XCLASS:
3641 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3642 return cc + GET(cc, 0) - 1;
3643 #endif
3644
3645 case OP_REVERSE:
3646 length = GET(cc, 0);
3647 SLJIT_ASSERT(length > 0);
3648 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3649 #ifdef SUPPORT_UTF
3650 if (common->utf)
3651 {
3652 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3653 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3654 label = LABEL();
3655 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3656 skip_char_back(common);
3657 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3658 JUMPTO(SLJIT_C_NOT_ZERO, label);
3659 }
3660 else
3661 #endif
3662 {
3663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3665 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3666 }
3667 check_start_used_ptr(common);
3668 return cc + LINK_SIZE;
3669 }
3670 SLJIT_ASSERT_STOP();
3671 return cc;
3672 }
3673
3674 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3675 {
3676 /* This function consumes at least one input character. */
3677 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3678 DEFINE_COMPILER;
3679 pcre_uchar *ccbegin = cc;
3680 compare_context context;
3681 int size;
3682
3683 context.length = 0;
3684 do
3685 {
3686 if (cc >= ccend)
3687 break;
3688
3689 if (*cc == OP_CHAR)
3690 {
3691 size = 1;
3692 #ifdef SUPPORT_UTF
3693 if (common->utf && HAS_EXTRALEN(cc[1]))
3694 size += GET_EXTRALEN(cc[1]);
3695 #endif
3696 }
3697 else if (*cc == OP_CHARI)
3698 {
3699 size = 1;
3700 #ifdef SUPPORT_UTF
3701 if (common->utf)
3702 {
3703 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3704 size = 0;
3705 else if (HAS_EXTRALEN(cc[1]))
3706 size += GET_EXTRALEN(cc[1]);
3707 }
3708 else
3709 #endif
3710 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3711 size = 0;
3712 }
3713 else
3714 size = 0;
3715
3716 cc += 1 + size;
3717 context.length += IN_UCHARS(size);
3718 }
3719 while (size > 0 && context.length <= 128);
3720
3721 cc = ccbegin;
3722 if (context.length > 0)
3723 {
3724 /* We have a fixed-length byte sequence. */
3725 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3726 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3727
3728 context.sourcereg = -1;
3729 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3730 context.ucharptr = 0;
3731 #endif
3732 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3733 return cc;
3734 }
3735
3736 /* A non-fixed length character will be checked if length == 0. */
3737 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3738 }
3739
3740 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3741 {
3742 DEFINE_COMPILER;
3743 int offset = GET2(cc, 1) << 1;
3744
3745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3746 if (!common->jscript_compat)
3747 {
3748 if (fallbacks == NULL)
3749 {
3750 /* OVECTOR(1) contains the "string begin - 1" constant. */
3751 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3752 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3753 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3754 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3755 return JUMP(SLJIT_C_NOT_ZERO);
3756 }
3757 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3758 }
3759 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3760 }
3761
3762 /* Forward definitions. */
3763 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3764 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3765
3766 #define PUSH_FALLBACK(size, ccstart, error) \
3767 do \
3768 { \
3769 fallback = sljit_alloc_memory(compiler, (size)); \
3770 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3771 return error; \
3772 memset(fallback, 0, size); \
3773 fallback->prev = parent->top; \
3774 fallback->cc = (ccstart); \
3775 parent->top = fallback; \
3776 } \
3777 while (0)
3778
3779 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3780 do \
3781 { \
3782 fallback = sljit_alloc_memory(compiler, (size)); \
3783 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3784 return; \
3785 memset(fallback, 0, size); \
3786 fallback->prev = parent->top; \
3787 fallback->cc = (ccstart); \
3788 parent->top = fallback; \
3789 } \
3790 while (0)
3791
3792 #define FALLBACK_AS(type) ((type *)fallback)
3793
3794 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3795 {
3796 DEFINE_COMPILER;
3797 int offset = GET2(cc, 1) << 1;
3798 struct sljit_jump *jump = NULL;
3799 struct sljit_jump *partial;
3800 struct sljit_jump *nopartial;
3801
3802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3803 /* OVECTOR(1) contains the "string begin - 1" constant. */
3804 if (withchecks && !common->jscript_compat)
3805 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3806
3807 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3808 if (common->utf && *cc == OP_REFI)
3809 {
3810 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3812 if (withchecks)
3813 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3814
3815 /* Needed to save important temporary registers. */
3816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3817 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3819 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3820 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3821 if (common->mode == JIT_COMPILE)
3822 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3823 else
3824 {
3825 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3826 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3827 check_partial(common, FALSE);
3828 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3829 JUMPHERE(nopartial);
3830 }
3831 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3832 }
3833 else
3834 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3835 {
3836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3837 if (withchecks)
3838 jump = JUMP(SLJIT_C_ZERO);
3839
3840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3841 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3842 if (common->mode == JIT_COMPILE)
3843 add_jump(compiler, fallbacks, partial);
3844
3845 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3846 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3847
3848 if (common->mode != JIT_COMPILE)
3849 {
3850 nopartial = JUMP(SLJIT_JUMP);
3851 JUMPHERE(partial);
3852 /* TMP2 -= STR_END - STR_PTR */
3853 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3854 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3855 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3856 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3857 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3858 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3859 JUMPHERE(partial);
3860 check_partial(common, FALSE);
3861 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3862 JUMPHERE(nopartial);
3863 }
3864 }
3865
3866 if (jump != NULL)
3867 {
3868 if (emptyfail)
3869 add_jump(compiler, fallbacks, jump);
3870 else
3871 JUMPHERE(jump);
3872 }
3873 return cc + 1 + IMM2_SIZE;
3874 }
3875
3876 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3877 {
3878 DEFINE_COMPILER;
3879 fallback_common *fallback;
3880 pcre_uchar type;
3881 struct sljit_label *label;
3882 struct sljit_jump *zerolength;
3883 struct sljit_jump *jump = NULL;
3884 pcre_uchar *ccbegin = cc;
3885 int min = 0, max = 0;
3886 BOOL minimize;
3887
3888 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3889
3890 type = cc[1 + IMM2_SIZE];
3891 minimize = (type & 0x1) != 0;
3892 switch(type)
3893 {
3894 case OP_CRSTAR:
3895 case OP_CRMINSTAR:
3896 min = 0;
3897 max = 0;
3898 cc += 1 + IMM2_SIZE + 1;
3899 break;
3900 case OP_CRPLUS:
3901 case OP_CRMINPLUS:
3902 min = 1;
3903 max = 0;
3904 cc += 1 + IMM2_SIZE + 1;
3905 break;
3906 case OP_CRQUERY:
3907 case OP_CRMINQUERY:
3908 min = 0;
3909 max = 1;
3910 cc += 1 + IMM2_SIZE + 1;
3911 break;
3912 case OP_CRRANGE:
3913 case OP_CRMINRANGE:
3914 min = GET2(cc, 1 + IMM2_SIZE + 1);
3915 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3916 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3917 break;
3918 default:
3919 SLJIT_ASSERT_STOP();
3920 break;
3921 }
3922
3923 if (!minimize)
3924 {
3925 if (min == 0)
3926 {
3927 allocate_stack(common, 2);
3928 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3930 /* Temporary release of STR_PTR. */
3931 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3932 zerolength = compile_ref_checks(common, ccbegin, NULL);
3933 /* Restore if not zero length. */
3934 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3935 }
3936 else
3937 {
3938 allocate_stack(common, 1);
3939 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3940 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3941 }
3942
3943 if (min > 1 || max > 1)
3944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3945
3946 label = LABEL();
3947 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3948
3949 if (min > 1 || max > 1)
3950 {
3951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3952 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3953 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3954 if (min > 1)
3955 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3956 if (max > 1)
3957 {
3958 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3959 allocate_stack(common, 1);
3960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3961 JUMPTO(SLJIT_JUMP, label);
3962 JUMPHERE(jump);
3963 }
3964 }
3965
3966 if (max == 0)
3967 {
3968 /* Includes min > 1 case as well. */
3969 allocate_stack(common, 1);
3970 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3971 JUMPTO(SLJIT_JUMP, label);
3972 }
3973
3974 JUMPHERE(zerolength);
3975 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3976
3977 decrease_call_count(common);
3978 return cc;
3979 }
3980
3981 allocate_stack(common, 2);
3982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3983 if (type != OP_CRMINSTAR)
3984 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3985
3986 if (min == 0)
3987 {
3988 zerolength = compile_ref_checks(common, ccbegin, NULL);
3989 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3990 jump = JUMP(SLJIT_JUMP);
3991 }
3992 else
3993 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3994
3995 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3996 if (max > 0)
3997 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3998
3999 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
4000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4001
4002 if (min > 1)
4003 {
4004 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4005 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4006 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4007 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
4008 }
4009 else if (max > 0)
4010 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4011
4012 if (jump != NULL)
4013 JUMPHERE(jump);
4014 JUMPHERE(zerolength);
4015
4016 decrease_call_count(common);
4017 return cc;
4018 }
4019
4020 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4021 {
4022 DEFINE_COMPILER;
4023 fallback_common *fallback;
4024 recurse_entry *entry = common->entries;
4025 recurse_entry *prev = NULL;
4026 int start = GET(cc, 1);
4027
4028 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
4029 while (entry != NULL)
4030 {
4031 if (entry->start == start)
4032 break;
4033 prev = entry;
4034 entry = entry->next;
4035 }
4036
4037 if (entry == NULL)
4038 {
4039 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4040 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4041 return NULL;
4042 entry->next = NULL;
4043 entry->entry = NULL;
4044 entry->calls = NULL;
4045 entry->start = start;
4046
4047 if (prev != NULL)
4048 prev->next = entry;
4049 else
4050 common->entries = entry;
4051 }
4052
4053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4054 allocate_stack(common, 1);
4055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4056
4057 if (entry->entry == NULL)
4058 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4059 else
4060 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4061 /* Leave if the match is failed. */
4062 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4063 return cc + 1 + LINK_SIZE;
4064 }
4065
4066 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
4067 {
4068 DEFINE_COMPILER;
4069 int framesize;
4070 int localptr;
4071 fallback_common altfallback;
4072 pcre_uchar *ccbegin;
4073 pcre_uchar opcode;
4074 pcre_uchar bra = OP_BRA;
4075 jump_list *tmp = NULL;
4076 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
4077 jump_list **found;
4078 /* Saving previous accept variables. */
4079 struct sljit_label *save_acceptlabel = common->acceptlabel;
4080 struct sljit_jump *jump;
4081 struct sljit_jump *brajump = NULL;
4082 jump_list *save_accept = common->accept;
4083
4084 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4085 {
4086 SLJIT_ASSERT(!conditional);
4087 bra = *cc;
4088 cc++;
4089 }
4090 localptr = PRIV_DATA(cc);
4091 SLJIT_ASSERT(localptr != 0);
4092 framesize = get_framesize(common, cc, FALSE);
4093 fallback->framesize = framesize;
4094 fallback->localptr = localptr;
4095 opcode = *cc;
4096 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4097 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4098 ccbegin = cc;
4099 cc += GET(cc, 1);
4100
4101 if (bra == OP_BRAMINZERO)
4102 {
4103 /* This is a braminzero fallback path. */
4104 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4105 free_stack(common, 1);
4106 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4107 }
4108
4109 if (framesize < 0)
4110 {
4111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4112 allocate_stack(common, 1);
4113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4114 }
4115 else
4116 {
4117 allocate_stack(common, framesize + 2);
4118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4119 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4123 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4124 }
4125
4126 memset(&altfallback, 0, sizeof(fallback_common));
4127 while (1)
4128 {
4129 common->acceptlabel = NULL;
4130 common->accept = NULL;
4131 altfallback.top = NULL;
4132 altfallback.topfallbacks = NULL;
4133
4134 if (*ccbegin == OP_ALT)
4135 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4136
4137 altfallback.cc = ccbegin;
4138 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
4139 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4140 {
4141 common->acceptlabel = save_acceptlabel;
4142 common->accept = save_accept;
4143 return NULL;
4144 }
4145 common->acceptlabel = LABEL();
4146 if (common->accept != NULL)
4147 set_jumps(common->accept, common->acceptlabel);
4148
4149 /* Reset stack. */
4150 if (framesize < 0)
4151 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4152 else {
4153 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4154 {
4155 /* We don't need to keep the STR_PTR, only the previous localptr. */
4156 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4157 }
4158 else
4159 {
4160 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4161 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4162 }
4163 }
4164
4165 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4166 {
4167 /* We know that STR_PTR was stored on the top of the stack. */
4168 if (conditional)
4169 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4170 else if (bra == OP_BRAZERO)
4171 {
4172 if (framesize < 0)
4173 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4174 else
4175 {
4176 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4177 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4179 }
4180 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4182 }
4183 else if (framesize >= 0)
4184 {
4185 /* For OP_BRA and OP_BRAMINZERO. */
4186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4187 }
4188 }
4189 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4190
4191 compile_fallbackpath(common, altfallback.top);
4192 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4193 {
4194 common->acceptlabel = save_acceptlabel;
4195 common->accept = save_accept;
4196 return NULL;
4197 }
4198 set_jumps(altfallback.topfallbacks, LABEL());
4199
4200 if (*cc != OP_ALT)
4201 break;
4202
4203 ccbegin = cc;
4204 cc += GET(cc, 1);
4205 }
4206 /* None of them matched. */
4207
4208 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4209 {
4210 /* Assert is failed. */
4211 if (conditional || bra == OP_BRAZERO)
4212 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4213
4214 if (framesize < 0)
4215 {
4216 /* The topmost item should be 0. */
4217 if (bra == OP_BRAZERO)
4218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4219 else
4220 free_stack(common, 1);
4221 }
4222 else
4223 {
4224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4225 /* The topmost item should be 0. */
4226 if (bra == OP_BRAZERO)
4227 {
4228 free_stack(common, framesize + 1);
4229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4230 }
4231 else
4232 free_stack(common, framesize + 2);
4233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4234 }
4235 jump = JUMP(SLJIT_JUMP);
4236 if (bra != OP_BRAZERO)
4237 add_jump(compiler, target, jump);
4238
4239 /* Assert is successful. */
4240 set_jumps(tmp, LABEL());
4241 if (framesize < 0)
4242 {
4243 /* We know that STR_PTR was stored on the top of the stack. */
4244 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4245 /* Keep the STR_PTR on the top of the stack. */
4246 if (bra == OP_BRAZERO)
4247 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4248 else if (bra == OP_BRAMINZERO)
4249 {
4250 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4252 }
4253 }
4254 else
4255 {
4256 if (bra == OP_BRA)
4257 {
4258 /* We don't need to keep the STR_PTR, only the previous localptr. */
4259 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4260 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4261 }
4262 else
4263 {
4264 /* We don't need to keep the STR_PTR, only the previous localptr. */
4265 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4266 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4268 }
4269 }
4270
4271 if (bra == OP_BRAZERO)
4272 {
4273 fallback->hotpath = LABEL();
4274 sljit_set_label(jump, fallback->hotpath);
4275 }
4276 else if (bra == OP_BRAMINZERO)
4277 {
4278 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4279 JUMPHERE(brajump);
4280 if (framesize >= 0)
4281 {
4282 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4283 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4285 }
4286 set_jumps(fallback->common.topfallbacks, LABEL());
4287 }
4288 }
4289 else
4290 {
4291 /* AssertNot is successful. */
4292 if (framesize < 0)
4293 {
4294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4295 if (bra != OP_BRA)
4296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4297 else
4298 free_stack(common, 1);
4299 }
4300 else
4301 {
4302 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4304 /* The topmost item should be 0. */
4305 if (bra != OP_BRA)
4306 {
4307 free_stack(common, framesize + 1);
4308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4309 }
4310 else
4311 free_stack(common, framesize + 2);
4312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4313 }
4314
4315 if (bra == OP_BRAZERO)
4316 fallback->hotpath = LABEL();
4317 else if (bra == OP_BRAMINZERO)
4318 {
4319 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4320 JUMPHERE(brajump);
4321 }
4322
4323 if (bra != OP_BRA)
4324 {
4325 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4326 set_jumps(fallback->common.topfallbacks, LABEL());
4327 fallback->common.topfallbacks = NULL;
4328 }
4329 }
4330
4331 common->acceptlabel = save_acceptlabel;
4332 common->accept = save_accept;
4333 return cc + 1 + LINK_SIZE;
4334 }
4335
4336 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4337 {
4338 int condition = FALSE;
4339 pcre_uchar *slotA = name_table;
4340 pcre_uchar *slotB;
4341 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4342 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4343 sljit_w no_capture;
4344 int i;
4345
4346 locals += refno & 0xff;
4347 refno >>= 8;
4348 no_capture = locals[1];
4349
4350 for (i = 0; i < name_count; i++)
4351 {
4352 if (GET2(slotA, 0) == refno) break;
4353 slotA += name_entry_size;
4354 }
4355
4356 if (i < name_count)
4357 {
4358 /* Found a name for the number - there can be only one; duplicate names
4359 for different numbers are allowed, but not vice versa. First scan down
4360 for duplicates. */
4361
4362 slotB = slotA;
4363 while (slotB > name_table)
4364 {
4365 slotB -= name_entry_size;
4366 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4367 {
4368 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4369 if (condition) break;
4370 }
4371 else break;
4372 }
4373
4374 /* Scan up for duplicates */
4375 if (!condition)
4376 {
4377 slotB = slotA;
4378 for (i++; i < name_count; i++)
4379 {
4380 slotB += name_entry_size;
4381 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4382 {
4383 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4384 if (condition) break;
4385 }
4386 else break;
4387 }
4388 }
4389 }
4390 return condition;
4391 }
4392
4393 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4394 {
4395 int condition = FALSE;
4396 pcre_uchar *slotA = name_table;
4397 pcre_uchar *slotB;
4398 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4399 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4400 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4401 int i;
4402
4403 for (i = 0; i < name_count; i++)
4404 {
4405 if (GET2(slotA, 0) == recno) break;
4406 slotA += name_entry_size;
4407 }
4408
4409 if (i < name_count)
4410 {
4411 /* Found a name for the number - there can be only one; duplicate
4412 names for different numbers are allowed, but not vice versa. First
4413 scan down for duplicates. */
4414
4415 slotB = slotA;
4416 while (slotB > name_table)
4417 {
4418 slotB -= name_entry_size;
4419 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4420 {
4421 condition = GET2(slotB, 0) == group_num;
4422 if (condition) break;
4423 }
4424 else break;
4425 }
4426
4427 /* Scan up for duplicates */
4428 if (!condition)
4429 {
4430 slotB = slotA;
4431 for (i++; i < name_count; i++)
4432 {
4433 slotB += name_entry_size;
4434 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4435 {
4436 condition = GET2(slotB, 0) == group_num;
4437 if (condition) break;
4438 }
4439 else break;
4440 }
4441 }
4442 }
4443 return condition;
4444 }
4445
4446 /*
4447 Handling bracketed expressions is probably the most complex part.
4448
4449 Stack layout naming characters:
4450 S - Push the current STR_PTR
4451 0 - Push a 0 (NULL)
4452 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4453 before the next alternative. Not pushed if there are no alternatives.
4454 M - Any values pushed by the current alternative. Can be empty, or anything.
4455 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4456 L - Push the previous local (pointed by localptr) to the stack
4457 () - opional values stored on the stack
4458 ()* - optonal, can be stored multiple times
4459
4460 The following list shows the regular expression templates, their PCRE byte codes
4461 and stack layout supported by pcre-sljit.
4462
4463 (?:) OP_BRA | OP_KET A M
4464 () OP_CBRA | OP_KET C M
4465 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4466 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4467 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4468 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4469 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4470 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4471 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4472 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4473 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4474 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4475 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4476 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4477 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4478 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4479 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4480 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4481 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4482 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4483 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4484 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4485
4486
4487 Stack layout naming characters:
4488 A - Push the alternative index (starting from 0) on the stack.
4489 Not pushed if there is no alternatives.
4490 M - Any values pushed by the current alternative. Can be empty, or anything.
4491
4492 The next list shows the possible content of a bracket:
4493 (|) OP_*BRA | OP_ALT ... M A
4494 (?()|) OP_*COND | OP_ALT M A
4495 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4496 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4497 Or nothing, if trace is unnecessary
4498 */
4499
4500 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4501 {
4502 DEFINE_COMPILER;
4503 fallback_common *fallback;
4504 pcre_uchar opcode;
4505 int localptr = 0;
4506 int offset = 0;
4507 int stacksize;
4508 pcre_uchar *ccbegin;
4509 pcre_uchar *hotpath;
4510 pcre_uchar bra = OP_BRA;
4511 pcre_uchar ket;
4512 assert_fallback *assert;
4513 BOOL has_alternatives;
4514 struct sljit_jump *jump;
4515 struct sljit_jump *skip;
4516 struct sljit_label *rmaxlabel = NULL;
4517 struct sljit_jump *braminzerojump = NULL;
4518
4519 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4520
4521 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4522 {
4523 bra = *cc;
4524 cc++;
4525 opcode = *cc;
4526 }
4527
4528 opcode = *cc;
4529 ccbegin = cc;
4530 hotpath = ccbegin + 1 + LINK_SIZE;
4531
4532 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4533 {
4534 /* Drop this bracket_fallback. */
4535 parent->top = fallback->prev;
4536 return bracketend(cc);
4537 }
4538
4539 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4540 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4541 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4542 cc += GET(cc, 1);
4543
4544 has_alternatives = *cc == OP_ALT;
4545 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4546 {
4547 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4548 if (*hotpath == OP_NRREF)
4549 {
4550 stacksize = GET2(hotpath, 1);
4551 if (common->currententry == NULL || stacksize == RREF_ANY)
4552 has_alternatives = FALSE;
4553 else if (common->currententry->start == 0)
4554 has_alternatives = stacksize != 0;
4555 else
4556 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4557 }
4558 }
4559
4560 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4561 opcode = OP_SCOND;
4562 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4563 opcode = OP_ONCE;
4564
4565 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4566 {
4567 /* Capturing brackets has a pre-allocated space. */
4568 offset = GET2(ccbegin, 1 + LINK_SIZE);
4569 localptr = OVECTOR_PRIV(offset);
4570 offset <<= 1;
4571 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4572 hotpath += IMM2_SIZE;
4573 }
4574 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4575 {
4576 /* Other brackets simply allocate the next entry. */
4577 localptr = PRIV_DATA(ccbegin);
4578 SLJIT_ASSERT(localptr != 0);
4579 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4580 if (opcode == OP_ONCE)
4581 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4582 }
4583
4584 /* Instructions before the first alternative. */
4585 stacksize = 0;
4586 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4587 stacksize++;
4588 if (bra == OP_BRAZERO)
4589 stacksize++;
4590
4591 if (stacksize > 0)
4592 allocate_stack(common, stacksize);
4593
4594 stacksize = 0;
4595 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4596 {
4597 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4598 stacksize++;
4599 }
4600
4601 if (bra == OP_BRAZERO)
4602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4603
4604 if (bra == OP_BRAMINZERO)
4605 {
4606 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4607 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4608 if (ket != OP_KETRMIN)
4609 {
4610 free_stack(common, 1);
4611 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4612 }
4613 else
4614 {
4615 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4616 {
4617 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4619 /* Nothing stored during the first run. */
4620 skip = JUMP(SLJIT_JUMP);
4621 JUMPHERE(jump);
4622 /* Checking zero-length iteration. */
4623 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4624 {
4625 /* When we come from outside, localptr contains the previous STR_PTR. */
4626 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4627 }
4628 else
4629 {
4630 /* Except when the whole stack frame must be saved. */
4631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4632 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4633 }
4634 JUMPHERE(skip);
4635 }
4636 else
4637 {
4638 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4639 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4640 JUMPHERE(jump);
4641 }
4642 }
4643 }
4644
4645 if (ket == OP_KETRMIN)
4646 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4647
4648 if (ket == OP_KETRMAX)
4649 {
4650 rmaxlabel = LABEL();
4651 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4652 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4653 }
4654
4655 /* Handling capturing brackets and alternatives. */
4656 if (opcode == OP_ONCE)
4657 {
4658 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4659 {
4660 /* Neither capturing brackets nor recursions are not found in the block. */
4661 if (ket == OP_KETRMIN)
4662 {
4663 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4664 allocate_stack(common, 2);
4665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4667 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4668 }
4669 else if (ket == OP_KETRMAX || has_alternatives)
4670 {
4671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4672 allocate_stack(common, 1);
4673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4674 }
4675 else
4676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4677 }
4678 else
4679 {
4680 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4681 {
4682 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4683 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4684 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4688 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4689 }
4690 else
4691 {
4692 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4694 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4696 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4697 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4698 }
4699 }
4700 }
4701 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4702 {
4703 /* Saving the previous values. */
4704 allocate_stack(common, 3);
4705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4706 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4708 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4712 }
4713 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4714 {
4715 /* Saving the previous value. */
4716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4717 allocate_stack(common, 1);
4718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4719 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4720 }
4721 else if (has_alternatives)
4722 {
4723 /* Pushing the starting string pointer. */
4724 allocate_stack(common, 1);
4725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4726 }
4727
4728 /* Generating code for the first alternative. */
4729 if (opcode == OP_COND || opcode == OP_SCOND)
4730 {
4731 if (*hotpath == OP_CREF)
4732 {
4733 SLJIT_ASSERT(has_alternatives);
4734 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4735 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4736 hotpath += 1 + IMM2_SIZE;
4737 }
4738 else if (*hotpath == OP_NCREF)
4739 {
4740 SLJIT_ASSERT(has_alternatives);
4741 stacksize = GET2(hotpath, 1);
4742 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4743
4744 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4746 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4747 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
4748 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4749 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4750 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4751 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4752 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4753
4754 JUMPHERE(jump);
4755 hotpath += 1 + IMM2_SIZE;
4756 }
4757 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4758 {
4759 /* Never has other case. */
4760 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4761
4762 stacksize = GET2(hotpath, 1);
4763 if (common->currententry == NULL)
4764 stacksize = 0;
4765 else if (stacksize == RREF_ANY)
4766 stacksize = 1;
4767 else if (common->currententry->start == 0)
4768 stacksize = stacksize == 0;
4769 else
4770 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4771
4772 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4773 {
4774 SLJIT_ASSERT(!has_alternatives);
4775 if (stacksize != 0)
4776 hotpath += 1 + IMM2_SIZE;
4777 else
4778 {
4779 if (*cc == OP_ALT)
4780 {
4781 hotpath = cc + 1 + LINK_SIZE;
4782 cc += GET(cc, 1);
4783 }
4784 else
4785 hotpath = cc;
4786 }
4787 }
4788 else
4789 {
4790 SLJIT_ASSERT(has_alternatives);
4791
4792 stacksize = GET2(hotpath, 1);
4793 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4795 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4797 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4798 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4799 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4800 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4801 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4802 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4803 hotpath += 1 + IMM2_SIZE;
4804 }
4805 }
4806 else
4807 {
4808 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4809 /* Similar code as PUSH_FALLBACK macro. */
4810 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4811 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4812 return NULL;
4813 memset(assert, 0, sizeof(assert_fallback));
4814 assert->common.cc = hotpath;
4815 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4816 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4817 }
4818 }
4819
4820 compile_hotpath(common, hotpath, cc, fallback);
4821 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4822 return NULL;
4823
4824 if (opcode == OP_ONCE)
4825 {
4826 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4827 {
4828 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4829 /* TMP2 which is set here used by OP_KETRMAX below. */
4830 if (ket == OP_KETRMAX)
4831 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4832 else if (ket == OP_KETRMIN)
4833 {
4834 /* Move the STR_PTR to the localptr. */
4835 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4836 }
4837 }
4838 else
4839 {
4840 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4841 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4842 if (ket == OP_KETRMAX)
4843 {
4844 /* TMP2 which is set here used by OP_KETRMAX below. */
4845 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4846 }
4847 }
4848 }
4849
4850 stacksize = 0;
4851 if (ket != OP_KET || bra != OP_BRA)
4852 stacksize++;
4853 if (has_alternatives && opcode != OP_ONCE)
4854 stacksize++;
4855
4856 if (stacksize > 0)
4857 allocate_stack(common, stacksize);
4858
4859 stacksize = 0;
4860 if (ket != OP_KET)
4861 {
4862 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4863 stacksize++;
4864 }
4865 else if (bra != OP_BRA)
4866 {
4867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4868 stacksize++;
4869 }
4870
4871 if (has_alternatives)
4872 {
4873 if (opcode != OP_ONCE)
4874 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4875 if (ket != OP_KETRMAX)
4876 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4877 }
4878
4879 /* Must be after the hotpath label. */
4880 if (offset != 0)
4881 {
4882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4885 }
4886
4887 if (ket == OP_KETRMAX)
4888 {
4889 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4890 {
4891 if (has_alternatives)
4892 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4893 /* Checking zero-length iteration. */
4894 if (opcode != OP_ONCE)
4895 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4896 else
4897 /* TMP2 must contain the starting STR_PTR. */
4898 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4899 }
4900 else
4901 JUMPTO(SLJIT_JUMP, rmaxlabel);
4902 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4903 }
4904
4905 if (bra == OP_BRAZERO)
4906 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4907
4908 if (bra == OP_BRAMINZERO)
4909 {
4910 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4911 JUMPTO(SLJIT_JUMP, ((braminzero_fallback *)parent)->hotpath);
4912 if (braminzerojump != NULL)
4913 {
4914 JUMPHERE(braminzerojump);
4915 /* We need to release the end pointer to perform the
4916 fallback for the zero-length iteration. When
4917 framesize is < 0, OP_ONCE will do the release itself. */
4918 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4919 {
4920 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4921 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4922 }
4923 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4924 free_stack(common, 1);
4925 }
4926 /* Continue to the normal fallback. */
4927 }
4928
4929 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4930 decrease_call_count(common);
4931
4932 /* Skip the other alternatives. */
4933 while (*cc == OP_ALT)
4934 cc += GET(cc, 1);
4935 cc += 1 + LINK_SIZE;
4936 return cc;
4937 }
4938
4939 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4940 {
4941 DEFINE_COMPILER;
4942 fallback_common *fallback;
4943 pcre_uchar opcode;
4944 int localptr;
4945 int cbraprivptr = 0;
4946 int framesize;
4947 int stacksize;
4948 int offset = 0;
4949 BOOL zero = FALSE;
4950 pcre_uchar *ccbegin = NULL;
4951 int stack;
4952 struct sljit_label *loop = NULL;
4953 struct jump_list *emptymatch = NULL;
4954
4955 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4956 if (*cc == OP_BRAPOSZERO)
4957 {
4958 zero = TRUE;
4959 cc++;
4960 }
4961
4962 opcode = *cc;
4963 localptr = PRIV_DATA(cc);
4964 SLJIT_ASSERT(localptr != 0);
4965 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4966 switch(opcode)
4967 {
4968 case OP_BRAPOS:
4969 case OP_SBRAPOS:
4970 ccbegin = cc + 1 + LINK_SIZE;
4971 break;
4972
4973 case OP_CBRAPOS:
4974 case OP_SCBRAPOS:
4975 offset = GET2(cc, 1 + LINK_SIZE);
4976 cbraprivptr = OVECTOR_PRIV(offset);
4977 offset <<= 1;
4978 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4979 break;
4980
4981 default:
4982 SLJIT_ASSERT_STOP();
4983 break;
4984 }
4985
4986 framesize = get_framesize(common, cc, FALSE);
4987 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4988 if (framesize < 0)
4989 {
4990 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4991 if (!zero)
4992 stacksize++;
4993 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4994 allocate_stack(common, stacksize);
4995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4996
4997 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4998 {
4999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5000 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5001 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5002 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5003 }
5004 else
5005 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5006
5007 if (!zero)
5008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5009 }
5010 else
5011 {
5012 stacksize = framesize + 1;
5013 if (!zero)
5014 stacksize++;
5015 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5016 stacksize++;
5017 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
5018 allocate_stack(common, stacksize);
5019
5020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5021 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5023 stack = 0;
5024 if (!zero)
5025 {
5026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5027 stack++;
5028 }
5029 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5030 {
5031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5032 stack++;
5033 }
5034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5035 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5036 }
5037
5038 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5040
5041 loop = LABEL();
5042 while (*cc != OP_KETRPOS)
5043 {
5044 fallback->top = NULL;
5045 fallback->topfallbacks = NULL;
5046 cc += GET(cc, 1);
5047
5048 compile_hotpath(common, ccbegin, cc, fallback);
5049 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5050 return NULL;
5051
5052 if (framesize < 0)
5053 {
5054 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5055
5056 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5057 {
5058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5059 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5062 }
5063 else
5064 {
5065 if (opcode == OP_SBRAPOS)
5066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5068 }
5069
5070 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5071 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5072
5073 if (!zero)
5074 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5075 }
5076 else
5077 {
5078 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5079 {
5080 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5081 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5085 }
5086 else
5087 {
5088 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5089 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5090 if (opcode == OP_SBRAPOS)
5091 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5092 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5093 }
5094
5095 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5096 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5097
5098 if (!zero)
5099 {
5100 if (framesize < 0)
5101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5102 else
5103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5104 }
5105 }
5106 JUMPTO(SLJIT_JUMP, loop);
5107 flush_stubs(common);
5108
5109 compile_fallbackpath(common, fallback->top);
5110 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5111 return NULL;
5112 set_jumps(fallback->topfallbacks, LABEL());
5113
5114 if (framesize < 0)
5115 {
5116 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5117 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5118 else
5119 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5120 }
5121 else
5122 {
5123 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5124 {
5125 /* Last alternative. */
5126 if (*cc == OP_KETRPOS)
5127 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5128 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5129 }
5130 else
5131 {
5132 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5134 }
5135 }
5136
5137 if (*cc == OP_KETRPOS)
5138 break;
5139 ccbegin = cc + 1 + LINK_SIZE;
5140 }
5141
5142 fallback->topfallbacks = NULL;
5143 if (!zero)
5144 {
5145 if (framesize < 0)
5146 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5147 else /* TMP2 is set to [localptr] above. */
5148 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5149 }
5150
5151 /* None of them matched. */
5152 set_jumps(emptymatch, LABEL());
5153 decrease_call_count(common);
5154 return cc + 1 + LINK_SIZE;
5155 }
5156
5157 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5158 {
5159 int class_len;
5160
5161 *opcode = *cc;
5162 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5163 {
5164 cc++;
5165 *type = OP_CHAR;
5166 }
5167 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5168 {
5169 cc++;
5170 *type = OP_CHARI;
5171 *opcode -= OP_STARI - OP_STAR;
5172 }
5173 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5174 {
5175 cc++;
5176 *type = OP_NOT;
5177 *opcode -= OP_NOTSTAR - OP_STAR;
5178 }
5179 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5180 {
5181 cc++;
5182 *type = OP_NOTI;
5183 *opcode -= OP_NOTSTARI - OP_STAR;
5184 }
5185 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5186 {
5187 cc++;
5188 *opcode -= OP_TYPESTAR - OP_STAR;
5189 *type = 0;
5190 }
5191 else
5192 {
5193 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5194 *type = *opcode;
5195 cc++;
5196 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5197 *opcode = cc[class_len - 1];
5198 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5199 {
5200 *opcode -= OP_CRSTAR - OP_STAR;
5201 if (end != NULL)
5202 *end = cc + class_len;
5203 }
5204 else
5205 {
5206 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5207 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5208 *arg2 = GET2(cc, class_len);
5209
5210 if (*arg2 == 0)
5211 {
5212 SLJIT_ASSERT(*arg1 != 0);
5213 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5214 }
5215 if (*arg1 == *arg2)
5216 *opcode = OP_EXACT;
5217
5218 if (end != NULL)
5219 *end = cc + class_len + 2 * IMM2_SIZE;
5220 }
5221 return cc;
5222 }
5223
5224 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5225 {
5226 *arg1 = GET2(cc, 0);
5227 cc += IMM2_SIZE;
5228 }
5229
5230 if (*type == 0)
5231 {
5232 *type = *cc;
5233 if (end != NULL)
5234 *end = next_opcode(common, cc);
5235 cc++;
5236 return cc;
5237 }
5238
5239 if (end != NULL)
5240 {
5241 *end = cc + 1;
5242 #ifdef SUPPORT_UTF
5243 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5244 #endif
5245 }
5246 return cc;
5247 }
5248
5249 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5250 {
5251 DEFINE_COMPILER;
5252 fallback_common *fallback;
5253 pcre_uchar opcode;
5254 pcre_uchar type;
5255 int arg1 = -1, arg2 = -1;
5256 pcre_uchar* end;
5257 jump_list *nomatch = NULL;
5258 struct sljit_jump *jump = NULL;
5259 struct sljit_label *label;
5260
5261 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
5262
5263 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5264
5265 switch(opcode)
5266 {
5267 case OP_STAR:
5268 case OP_PLUS:
5269 case OP_UPTO:
5270 case OP_CRRANGE:
5271 if (type == OP_ANYNL || type == OP_EXTUNI)
5272 {
5273 if (opcode == OP_STAR || opcode == OP_UPTO)
5274 {
5275 allocate_stack(common, 2);
5276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5278 }
5279 else
5280 {
5281 allocate_stack(common, 1);
5282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5283 }
5284 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5285 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5286
5287 label = LABEL();
5288 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5289 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5290 {
5291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5292 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5293 if (opcode == OP_CRRANGE && arg2 > 0)
5294 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5295 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5296 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5298 }
5299
5300 allocate_stack(common, 1);
5301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5302 JUMPTO(SLJIT_JUMP, label);
5303 if (jump != NULL)
5304 JUMPHERE(jump);
5305 }
5306 else
5307 {
5308 allocate_stack(common, 2);
5309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5311 label = LABEL();
5312 compile_char1_hotpath(common, type, cc, &nomatch);
5313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5314 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5315 {
5316 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5317 JUMPTO(SLJIT_JUMP, label);
5318 }
5319 else
5320 {
5321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5322 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5324 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5325 }
5326 set_jumps(nomatch, LABEL());
5327 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5328 add_jump(compiler, &fallback->topfallbacks,
5329 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5330 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5331 }
5332 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5333 break;
5334
5335 case OP_MINSTAR:
5336 case OP_MINPLUS:
5337 allocate_stack(common, 1);
5338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5339 if (opcode == OP_MINPLUS)
5340 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5341 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5342 break;
5343
5344 case OP_MINUPTO:
5345 case OP_CRMINRANGE:
5346 allocate_stack(common, 2);
5347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5349 if (opcode == OP_CRMINRANGE)
5350 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5351 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5352 break;
5353
5354 case OP_QUERY:
5355 case OP_MINQUERY:
5356 allocate_stack(common, 1);
5357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5358 if (opcode == OP_QUERY)
5359 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5360 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5361 break;
5362
5363 case OP_EXACT:
5364 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5365 label = LABEL();
5366 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5368 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5369 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5370 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5371 break;
5372
5373 case OP_POSSTAR:
5374 case OP_POSPLUS:
5375 case OP_POSUPTO:
5376 if (opcode != OP_POSSTAR)
5377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5378 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5379 label = LABEL();
5380 compile_char1_hotpath(common, type, cc, &nomatch);
5381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5382 if (opcode != OP_POSUPTO)
5383 {
5384 if (opcode == OP_POSPLUS)
5385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5386 JUMPTO(SLJIT_JUMP, label);
5387 }
5388 else
5389 {
5390 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5391 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5393 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5394 }
5395 set_jumps(nomatch, LABEL());
5396 if (opcode == OP_POSPLUS)
5397 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5398 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5399 break;
5400
5401 case OP_POSQUERY:
5402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5403 compile_char1_hotpath(common, type, cc, &nomatch);
5404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5405 set_jumps(nomatch, LABEL());
5406 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5407 break;
5408
5409 default:
5410 SLJIT_ASSERT_STOP();
5411 break;
5412 }
5413
5414 decrease_call_count(common);
5415 return end;
5416 }
5417
5418 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5419 {
5420 DEFINE_COMPILER;
5421 fallback_common *fallback;
5422
5423 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5424
5425 if (*cc == OP_FAIL)
5426 {
5427 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5428 return cc + 1;
5429 }
5430
5431 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5432 {
5433 /* No need to check notempty conditions. */
5434 if (common->acceptlabel == NULL)
5435 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5436 else
5437 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5438 return cc + 1;
5439 }
5440
5441 if (common->acceptlabel == NULL)
5442 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5443 else
5444 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5445 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5446 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5447 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5448 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5449 if (common->acceptlabel == NULL)
5450 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5451 else
5452 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5453 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5454 if (common->acceptlabel == NULL)
5455 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5456 else
5457 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5458 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5459 return cc + 1;
5460 }
5461
5462 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5463 {
5464 DEFINE_COMPILER;
5465 int offset = GET2(cc, 1);
5466
5467 /* Data will be discarded anyway... */
5468 if (common->currententry != NULL)
5469 return cc + 1 + IMM2_SIZE;
5470
5471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5472 offset <<= 1;
5473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5474 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5475 return cc + 1 + IMM2_SIZE;
5476 }
5477
5478 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5479 {
5480 DEFINE_COMPILER;
5481 fallback_common *fallback;
5482
5483 while (cc < ccend)
5484 {
5485 switch(*cc)
5486 {
5487 case OP_SOD:
5488 case OP_SOM:
5489 case OP_NOT_WORD_BOUNDARY:
5490 case OP_WORD_BOUNDARY:
5491 case OP_NOT_DIGIT:
5492 case OP_DIGIT:
5493 case OP_NOT_WHITESPACE:
5494 case OP_WHITESPACE:
5495 case OP_NOT_WORDCHAR:
5496 case OP_WORDCHAR:
5497 case OP_ANY:
5498 case OP_ALLANY:
5499 case OP_ANYBYTE:
5500 case OP_NOTPROP:
5501 case OP_PROP:
5502 case OP_ANYNL:
5503 case OP_NOT_HSPACE:
5504 case OP_HSPACE:
5505 case OP_NOT_VSPACE:
5506 case OP_VSPACE:
5507 case OP_EXTUNI:
5508 case OP_EODN:
5509 case OP_EOD:
5510 case OP_CIRC:
5511 case OP_CIRCM:
5512 case OP_DOLL:
5513 case OP_DOLLM:
5514 case OP_NOT:
5515 case OP_NOTI:
5516 case OP_REVERSE:
5517 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5518 break;
5519
5520 case OP_SET_SOM:
5521 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5522 allocate_stack(common, 1);
5523 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5524 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5526 cc++;
5527 break;
5528
5529 case OP_CHAR:
5530 case OP_CHARI:
5531 if (common->mode == JIT_COMPILE)
5532 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5533 else
5534 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5535 break;
5536
5537 case OP_STAR:
5538 case OP_MINSTAR:
5539 case OP_PLUS:
5540 case OP_MINPLUS:
5541 case OP_QUERY:
5542 case OP_MINQUERY:
5543 case OP_UPTO:
5544 case OP_MINUPTO:
5545 case OP_EXACT:
5546 case OP_POSSTAR:
5547 case OP_POSPLUS:
5548 case OP_POSQUERY:
5549 case OP_POSUPTO:
5550 case OP_STARI:
5551 case OP_MINSTARI:
5552 case OP_PLUSI:
5553 case OP_MINPLUSI:
5554 case OP_QUERYI:
5555 case OP_MINQUERYI:
5556 case OP_UPTOI:
5557 case OP_MINUPTOI:
5558 case OP_EXACTI:
5559 case OP_POSSTARI:
5560 case OP_POSPLUSI:
5561 case OP_POSQUERYI:
5562 case OP_POSUPTOI:
5563 case OP_NOTSTAR:
5564 case OP_NOTMINSTAR:
5565 case OP_NOTPLUS:
5566 case OP_NOTMINPLUS:
5567 case OP_NOTQUERY:
5568 case OP_NOTMINQUERY:
5569 case OP_NOTUPTO:
5570 case OP_NOTMINUPTO:
5571 case OP_NOTEXACT:
5572 case OP_NOTPOSSTAR:
5573 case OP_NOTPOSPLUS:
5574 case OP_NOTPOSQUERY:
5575 case OP_NOTPOSUPTO:
5576 case OP_NOTSTARI:
5577 case OP_NOTMINSTARI:
5578 case OP_NOTPLUSI:
5579 case OP_NOTMINPLUSI:
5580 case OP_NOTQUERYI:
5581 case OP_NOTMINQUERYI:
5582 case OP_NOTUPTOI:
5583 case OP_NOTMINUPTOI:
5584 case OP_NOTEXACTI:
5585 case OP_NOTPOSSTARI:
5586 case OP_NOTPOSPLUSI:
5587 case OP_NOTPOSQUERYI:
5588 case OP_NOTPOSUPTOI:
5589 case OP_TYPESTAR:
5590 case OP_TYPEMINSTAR:
5591 case OP_TYPEPLUS:
5592 case OP_TYPEMINPLUS:
5593 case OP_TYPEQUERY:
5594 case OP_TYPEMINQUERY:
5595 case OP_TYPEUPTO:
5596 case OP_TYPEMINUPTO:
5597 case OP_TYPEEXACT:
5598 case OP_TYPEPOSSTAR:
5599 case OP_TYPEPOSPLUS:
5600 case OP_TYPEPOSQUERY:
5601 case OP_TYPEPOSUPTO:
5602 cc = compile_iterator_hotpath(common, cc, parent);
5603 break;
5604
5605 case OP_CLASS:
5606 case OP_NCLASS:
5607 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5608 cc = compile_iterator_hotpath(common, cc, parent);
5609 else
5610 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5611 break;
5612
5613 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5614 case OP_XCLASS:
5615 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5616 cc = compile_iterator_hotpath(common, cc, parent);
5617 else
5618 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5619 break;
5620 #endif
5621
5622 case OP_REF:
5623 case OP_REFI:
5624 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5625 cc = compile_ref_iterator_hotpath(common, cc, parent);
5626 else
5627 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5628 break;
5629
5630 case OP_RECURSE:
5631 cc = compile_recurse_hotpath(common, cc, parent);
5632 break;
5633
5634 case OP_ASSERT:
5635 case OP_ASSERT_NOT:
5636 case OP_ASSERTBACK:
5637 case OP_ASSERTBACK_NOT:
5638 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5639 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5640 break;
5641
5642 case OP_BRAMINZERO:
5643 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5644 cc = bracketend(cc + 1);
5645 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5646 {
5647 allocate_stack(common, 1);
5648 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5649 }
5650 else
5651 {
5652 allocate_stack(common, 2);
5653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5655 }
5656 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5657 if (cc[1] > OP_ASSERTBACK_NOT)
5658 decrease_call_count(common);
5659 break;
5660
5661 case OP_ONCE:
5662 case OP_ONCE_NC:
5663 case OP_BRA:
5664 case OP_CBRA:
5665 case OP_COND:
5666 case OP_SBRA:
5667 case OP_SCBRA:
5668 case OP_SCOND:
5669 cc = compile_bracket_hotpath(common, cc, parent);
5670 break;
5671
5672 case OP_BRAZERO:
5673 if (cc[1] > OP_ASSERTBACK_NOT)
5674 cc = compile_bracket_hotpath(common, cc, parent);
5675 else
5676 {
5677 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5678 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5679 }
5680 break;
5681
5682 case OP_BRAPOS:
5683 case OP_CBRAPOS:
5684 case OP_SBRAPOS:
5685 case OP_SCBRAPOS:
5686 case OP_BRAPOSZERO:
5687 cc = compile_bracketpos_hotpath(common, cc, parent);
5688 break;
5689
5690 case OP_FAIL:
5691 case OP_ACCEPT:
5692 case OP_ASSERT_ACCEPT:
5693 cc = compile_fail_accept_hotpath(common, cc, parent);
5694 break;
5695
5696 case OP_CLOSE:
5697 cc = compile_close_hotpath(common, cc);
5698 break;
5699
5700 case OP_SKIPZERO:
5701 cc = bracketend(cc + 1);
5702 break;
5703
5704 default:
5705 SLJIT_ASSERT_STOP();
5706 return;
5707 }
5708 if (cc == NULL)
5709 return;
5710 }
5711 SLJIT_ASSERT(cc == ccend);
5712 }
5713
5714 #undef PUSH_FALLBACK
5715 #undef PUSH_FALLBACK_NOVALUE
5716 #undef FALLBACK_AS
5717
5718 #define COMPILE_FALLBACKPATH(current) \
5719 do \
5720 { \
5721 compile_fallbackpath(common, (current)); \
5722 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5723 return; \
5724 } \
5725 while (0)
5726
5727 #define CURRENT_AS(type) ((type *)current)
5728
5729 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5730 {
5731 DEFINE_COMPILER;
5732 pcre_uchar *cc = current->cc;
5733 pcre_uchar opcode;
5734 pcre_uchar type;
5735 int arg1 = -1, arg2 = -1;
5736 struct sljit_label *label = NULL;
5737 struct sljit_jump *jump = NULL;
5738
5739 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5740
5741 switch(opcode)
5742 {
5743 case OP_STAR:
5744 case OP_PLUS:
5745 case OP_UPTO:
5746 case OP_CRRANGE:
5747 if (type == OP_ANYNL || type == OP_EXTUNI)
5748 {
5749 set_jumps(current->topfallbacks, LABEL());
5750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5751 free_stack(common, 1);
5752 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5753 }
5754 else
5755 {
5756 if (opcode == OP_STAR || opcode == OP_UPTO)
5757 arg2 = 0;
5758 else if (opcode == OP_PLUS)
5759 arg2 = 1;
5760 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5761 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5762 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5763 skip_char_back(common);
5764 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5765 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5766 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5767 set_jumps(current->topfallbacks, LABEL());
5768 JUMPHERE(jump);
5769 free_stack(common, 2);
5770 }
5771 break;
5772
5773 case OP_MINSTAR:
5774 case OP_MINPLUS:
5775 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5776 if (opcode == OP_MINPLUS)
5777 {
5778 set_jumps(current->topfallbacks, LABEL());
5779 current->topfallbacks = NULL;
5780 }
5781 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5783 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5784 set_jumps(current->topfallbacks, LABEL());
5785 free_stack(common, 1);
5786 break;
5787
5788 case OP_MINUPTO:
5789 case OP_CRMINRANGE:
5790 if (opcode == OP_CRMINRANGE)
5791 {
5792 set_jumps(current->topfallbacks, LABEL());
5793 current->topfallbacks = NULL;
5794 label = LABEL();
5795 }
5796 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5797 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5798
5799 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5802 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5803
5804 if (opcode == OP_CRMINRANGE)
5805 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5806
5807 if (opcode == OP_CRMINRANGE && arg1 == 0)
5808 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5809 else
5810 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5811
5812 set_jumps(current->topfallbacks, LABEL());
5813 free_stack(common, 2);
5814 break;
5815
5816 case OP_QUERY:
5817 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5819 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5820 jump = JUMP(SLJIT_JUMP);
5821 set_jumps(current->topfallbacks, LABEL());
5822 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5824 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5825 JUMPHERE(jump);
5826 free_stack(common, 1);
5827 break;
5828
5829 case OP_MINQUERY:
5830 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5831 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5832 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5833 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5834 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5835 set_jumps(current->topfallbacks, LABEL());
5836 JUMPHERE(jump);
5837 free_stack(common, 1);
5838 break;
5839
5840 case OP_EXACT:
5841 case OP_POSPLUS:
5842 set_jumps(current->topfallbacks, LABEL());
5843 break;
5844
5845 case OP_POSSTAR:
5846 case OP_POSQUERY:
5847 case OP_POSUPTO:
5848 break;
5849
5850 default:
5851 SLJIT_ASSERT_STOP();
5852 break;
5853 }
5854 }
5855
5856 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5857 {
5858 DEFINE_COMPILER;
5859 pcre_uchar *cc = current->cc;
5860 pcre_uchar type;
5861
5862 type = cc[1 + IMM2_SIZE];
5863 if ((type & 0x1) == 0)
5864 {
5865 set_jumps(current->topfallbacks, LABEL());
5866 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5867 free_stack(common, 1);
5868 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5869 return;
5870 }
5871
5872 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5873 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5874 set_jumps(current->topfallbacks, LABEL());
5875 free_stack(common, 2);
5876 }
5877
5878 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5879 {
5880 DEFINE_COMPILER;
5881
5882 set_jumps(current->topfallbacks, LABEL());
5883 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5884 free_stack(common, 1);
5885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5886 }
5887
5888 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5889 {
5890 DEFINE_COMPILER;
5891 pcre_uchar *cc = current->cc;
5892 pcre_uchar bra = OP_BRA;
5893 struct sljit_jump *brajump = NULL;
5894
5895 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5896 if (*cc == OP_BRAZERO)
5897 {
5898 bra = *cc;
5899 cc++;
5900 }
5901
5902 if (bra == OP_BRAZERO)
5903 {
5904 SLJIT_ASSERT(current->topfallbacks == NULL);
5905 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5906 }
5907
5908 if (CURRENT_AS(assert_fallback)->framesize < 0)
5909 {
5910 set_jumps(current->topfallbacks, LABEL());
5911
5912 if (bra == OP_BRAZERO)
5913 {
5914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5915 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5916 free_stack(common, 1);
5917 }
5918 return;
5919 }
5920
5921 if (bra == OP_BRAZERO)
5922 {
5923 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5924 {
5925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5926 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5927 free_stack(common, 1);
5928 return;
5929 }
5930 free_stack(common, 1);
5931 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5932 }
5933
5934 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5935 {
5936 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5937 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5939
5940 set_jumps(current->topfallbacks, LABEL());
5941 }
5942 else
5943 set_jumps(current->topfallbacks, LABEL());
5944
5945 if (bra == OP_BRAZERO)
5946 {
5947 /* We know there is enough place on the stack. */
5948 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5950 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5951 JUMPHERE(brajump);
5952 }
5953 }
5954
5955 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5956 {
5957 DEFINE_COMPILER;
5958 int opcode;
5959 int offset = 0;
5960 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5961 int stacksize;
5962 int count;
5963 pcre_uchar *cc = current->cc;
5964 pcre_uchar *ccbegin;
5965 pcre_uchar *ccprev;
5966 jump_list *jumplist = NULL;
5967 jump_list *jumplistitem = NULL;
5968 pcre_uchar bra = OP_BRA;
5969 pcre_uchar ket;
5970 assert_fallback *assert;
5971 BOOL has_alternatives;
5972 struct sljit_jump *brazero = NULL;
5973 struct sljit_jump *once = NULL;
5974 struct sljit_jump *cond = NULL;
5975 struct sljit_label *rminlabel = NULL;
5976
5977 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5978 {
5979 bra = *cc;
5980 cc++;
5981 }
5982
5983 opcode = *cc;
5984 ccbegin = cc;
5985 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5986 cc += GET(cc, 1);
5987 has_alternatives = *cc == OP_ALT;
5988 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5989 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5990 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5991 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5992 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5993 opcode = OP_SCOND;
5994 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5995 opcode = OP_ONCE;
5996
5997 if (ket == OP_KETRMAX)
5998 {
5999 if (bra != OP_BRAZERO)
6000 free_stack(common, 1);
6001 else
6002 {
6003 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6004 free_stack(common, 1);
6005 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6006 }
6007 }
6008 else if (ket == OP_KETRMIN)
6009 {
6010 if (bra != OP_BRAMINZERO)
6011 {
6012 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6013 if (opcode >= OP_SBRA || opcode == OP_ONCE)
6014 {
6015 /* Checking zero-length iteration. */
6016 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
6017 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
6018 else
6019 {
6020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6021 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
6022 }
6023 if (opcode != OP_ONCE)
6024 free_stack(common, 1);
6025 }
6026 else
6027 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
6028 }
6029 rminlabel = LABEL();
6030 }
6031 else if (bra == OP_BRAZERO)
6032 {
6033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6034 free_stack(common, 1);
6035 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6036 }
6037
6038 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6039 {
6040 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6041 {
6042 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6043 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6044 }
6045 once = JUMP(SLJIT_JUMP);
6046 }
6047 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6048 {
6049 if (has_alternatives)
6050 {
6051 /* Always exactly one alternative. */
6052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6053 free_stack(common, 1);
6054
6055 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6056 if (SLJIT_UNLIKELY(!jumplistitem))
6057 return;
6058 jumplist = jumplistitem;
6059 jumplistitem->next = NULL;
6060 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6061 }
6062 }
6063 else if (*cc == OP_ALT)
6064 {
6065 /* Build a jump list. Get the last successfully matched branch index. */
6066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6067 free_stack(common, 1);
6068 count = 1;
6069 do
6070 {
6071 /* Append as the last item. */
6072 if (jumplist != NULL)
6073 {
6074 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6075 jumplistitem = jumplistitem->next;
6076 }
6077 else
6078 {
6079 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6080 jumplist = jumplistitem;
6081 }
6082
6083 if (SLJIT_UNLIKELY(!jumplistitem))
6084 return;
6085
6086 jumplistitem->next = NULL;
6087 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6088 cc += GET(cc, 1);
6089 }
6090 while (*cc == OP_ALT);
6091
6092 cc = ccbegin + GET(ccbegin, 1);
6093 }
6094
6095 COMPILE_FALLBACKPATH(current->top);
6096 if (current->topfallbacks)
6097 set_jumps(current->topfallbacks, LABEL());
6098
6099 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6100 {
6101 /* Conditional block always has at most one alternative. */
6102 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6103 {
6104 SLJIT_ASSERT(has_alternatives);
6105 assert = CURRENT_AS(bracket_fallback)->u.assert;
6106 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6107 {
6108 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6109 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6111 }
6112 cond = JUMP(SLJIT_JUMP);
6113 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
6114 }
6115 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
6116 {
6117 SLJIT_ASSERT(has_alternatives);
6118 cond = JUMP(SLJIT_JUMP);
6119 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
6120 }
6121 else
6122 SLJIT_ASSERT(!has_alternatives);
6123 }
6124
6125 if (has_alternatives)
6126 {
6127 count = 1;
6128 do
6129 {
6130 current->top = NULL;
6131 current->topfallbacks = NULL;
6132 current->nextfallbacks = NULL;
6133 if (*cc == OP_ALT)
6134 {
6135 ccprev = cc + 1 + LINK_SIZE;
6136 cc += GET(cc, 1);
6137 if (opcode != OP_COND && opcode != OP_SCOND)
6138 {
6139 if (localptr != 0 && opcode != OP_ONCE)
6140 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6141 else
6142 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6143 }
6144 compile_hotpath(common, ccprev, cc, current);
6145 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6146 return;
6147 }
6148
6149 /* Instructions after the current alternative is succesfully matched. */
6150 /* There is a similar code in compile_bracket_hotpath. */
6151 if (opcode == OP_ONCE)
6152 {
6153 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
6154 {
6155 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6156 /* TMP2 which is set here used by OP_KETRMAX below. */
6157 if (ket == OP_KETRMAX)
6158 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6159 else if (ket == OP_KETRMIN)
6160 {
6161 /* Move the STR_PTR to the localptr. */
6162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6163 }
6164 }
6165 else
6166 {
6167 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
6168 if (ket == OP_KETRMAX)
6169 {
6170 /* TMP2 which is set here used by OP_KETRMAX below. */
6171 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6172 }
6173 }
6174 }
6175
6176 stacksize = 0;
6177 if (opcode != OP_ONCE)
6178 stacksize++;
6179 if (ket != OP_KET || bra != OP_BRA)
6180 stacksize++;
6181
6182 if (stacksize > 0) {
6183 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6184 allocate_stack(common, stacksize);
6185 else
6186 {
6187 /* We know we have place at least for one item on the top of the stack. */
6188 SLJIT_ASSERT(stacksize == 1);
6189 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6190 }
6191 }
6192
6193 stacksize = 0;
6194 if (ket != OP_KET || bra != OP_BRA)
6195 {
6196 if (ket != OP_KET)
6197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6198 else
6199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6200 stacksize++;
6201 }
6202
6203 if (opcode != OP_ONCE)
6204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6205
6206 if (offset != 0)
6207 {
6208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6210 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6211 }
6212
6213 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
6214
6215 if (opcode != OP_ONCE)
6216 {
6217 SLJIT_ASSERT(jumplist);
6218 JUMPHERE(jumplist->jump);
6219 jumplist = jumplist->next;
6220 }
6221
6222 COMPILE_FALLBACKPATH(current->top);
6223 if (current->topfallbacks)
6224 set_jumps(current->topfallbacks, LABEL());
6225 SLJIT_ASSERT(!current->nextfallbacks);
6226 }
6227 while (*cc == OP_ALT);
6228 SLJIT_ASSERT(!jumplist);
6229
6230 if (cond != NULL)
6231 {
6232 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6233 assert = CURRENT_AS(bracket_fallback)->u.assert;
6234 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6235
6236 {
6237 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6238 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6239 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6240 }
6241 JUMPHERE(cond);
6242 }
6243
6244 /* Free the STR_PTR. */
6245 if (localptr == 0)
6246 free_stack(common, 1);
6247 }
6248
6249 if (offset != 0)
6250 {
6251 /* Using both tmp register is better for instruction scheduling. */
6252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6257 free_stack(common, 3);
6258 }
6259 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6260 {
6261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6262 free_stack(common, 1);
6263 }
6264 else if (opcode == OP_ONCE)
6265 {
6266 cc = ccbegin + GET(ccbegin, 1);
6267 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6268 {
6269 /* Reset head and drop saved frame. */
6270 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6271 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
6272 }
6273 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6274 {
6275 /* The STR_PTR must be released. */
6276 free_stack(common, 1);
6277 }
6278
6279 JUMPHERE(once);
6280 /* Restore previous localptr */
6281 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
6283 else if (ket == OP_KETRMIN)
6284 {
6285 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6286 /* See the comment below. */
6287 free_stack(common, 2);
6288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6289 }
6290 }
6291
6292 if (ket == OP_KETRMAX)
6293 {
6294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6295 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
6296 if (bra == OP_BRAZERO)
6297 {
6298 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6299 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6300 JUMPHERE(brazero);
6301 }
6302 free_stack(common, 1);
6303 }
6304 else if (ket == OP_KETRMIN)
6305 {
6306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6307
6308 /* OP_ONCE removes everything in case of a fallback, so we don't
6309 need to explicitly release the STR_PTR. The extra release would
6310 affect badly the free_stack(2) above. */
6311 if (opcode != OP_ONCE)
6312 free_stack(common, 1);
6313 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6314 if (opcode == OP_ONCE)
6315 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6316 else if (bra == OP_BRAMINZERO)
6317 free_stack(common, 1);
6318 }
6319 else if (bra == OP_BRAZERO)
6320 {
6321 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6322 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6323 JUMPHERE(brazero);
6324 }
6325 }
6326
6327 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
6328 {
6329 DEFINE_COMPILER;
6330 int offset;
6331 struct sljit_jump *jump;
6332
6333 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6334 {
6335 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6336 {
6337 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6342 }
6343 set_jumps(current->topfallbacks, LABEL());
6344 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6345 return;
6346 }
6347
6348 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6349 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6350
6351 if (current->topfallbacks)
6352 {
6353 jump = JUMP(SLJIT_JUMP);
6354 set_jumps(current->topfallbacks, LABEL());
6355 /* Drop the stack frame. */
6356 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6357 JUMPHERE(jump);
6358 }
6359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6360 }
6361
6362 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6363 {
6364 assert_fallback fallback;
6365
6366 current->top = NULL;
6367 current->topfallbacks = NULL;
6368 current->nextfallbacks = NULL;
6369 if (current->cc[1] > OP_ASSERTBACK_NOT)
6370 {
6371 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6372 compile_bracket_hotpath(common, current->cc, current);
6373 compile_bracket_fallbackpath(common, current->top);
6374 }
6375 else
6376 {
6377 memset(&fallback, 0, sizeof(fallback));
6378 fallback.common.cc = current->cc;
6379 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6380 /* Manual call of compile_assert_hotpath. */
6381 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6382 }
6383 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6384 }
6385
6386 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6387 {
6388 DEFINE_COMPILER;
6389
6390 while (current)
6391 {
6392 if (current->nextfallbacks != NULL)
6393 set_jumps(current->nextfallbacks, LABEL());
6394 switch(*current->cc)
6395 {
6396 case OP_SET_SOM:
6397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6398 free_stack(common, 1);
6399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6400 break;
6401
6402 case OP_STAR:
6403 case OP_MINSTAR:
6404 case OP_PLUS:
6405 case OP_MINPLUS:
6406 case OP_QUERY:
6407 case OP_MINQUERY:
6408 case OP_UPTO:
6409 case OP_MINUPTO:
6410 case OP_EXACT:
6411 case OP_POSSTAR:
6412 case OP_POSPLUS:
6413 case OP_POSQUERY:
6414 case OP_POSUPTO:
6415 case OP_STARI:
6416 case OP_MINSTARI:
6417 case OP_PLUSI:
6418 case OP_MINPLUSI:
6419 case OP_QUERYI:
6420 case OP_MINQUERYI:
6421 case OP_UPTOI:
6422 case OP_MINUPTOI:
6423 case OP_EXACTI:
6424 case OP_POSSTARI:
6425 case OP_POSPLUSI:
6426 case OP_POSQUERYI:
6427 case OP_POSUPTOI:
6428 case OP_NOTSTAR:
6429 case OP_NOTMINSTAR:
6430 case OP_NOTPLUS:
6431 case OP_NOTMINPLUS:
6432 case OP_NOTQUERY:
6433 case OP_NOTMINQUERY:
6434 case OP_NOTUPTO:
6435 case OP_NOTMINUPTO:
6436 case OP_NOTEXACT:
6437 case OP_NOTPOSSTAR:
6438 case OP_NOTPOSPLUS:
6439 case OP_NOTPOSQUERY:
6440 case OP_NOTPOSUPTO:
6441 case OP_NOTSTARI:
6442 case OP_NOTMINSTARI:
6443 case OP_NOTPLUSI:
6444 case OP_NOTMINPLUSI:
6445 case OP_NOTQUERYI:
6446 case OP_NOTMINQUERYI:
6447 case OP_NOTUPTOI:
6448 case OP_NOTMINUPTOI:
6449 case OP_NOTEXACTI:
6450 case OP_NOTPOSSTARI:
6451 case OP_NOTPOSPLUSI:
6452 case OP_NOTPOSQUERYI:
6453 case OP_NOTPOSUPTOI:
6454 case OP_TYPESTAR:
6455 case OP_TYPEMINSTAR:
6456 case OP_TYPEPLUS:
6457 case OP_TYPEMINPLUS:
6458 case OP_TYPEQUERY:
6459 case OP_TYPEMINQUERY:
6460 case OP_TYPEUPTO:
6461 case OP_TYPEMINUPTO:
6462 case OP_TYPEEXACT:
6463 case OP_TYPEPOSSTAR:
6464 case OP_TYPEPOSPLUS:
6465 case OP_TYPEPOSQUERY:
6466 case OP_TYPEPOSUPTO:
6467 case OP_CLASS:
6468 case OP_NCLASS:
6469 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6470 case OP_XCLASS:
6471 #endif
6472 compile_iterator_fallbackpath(common, current);
6473 break;
6474
6475 case OP_REF:
6476 case OP_REFI:
6477 compile_ref_iterator_fallbackpath(common, current);
6478 break;
6479
6480 case OP_RECURSE:
6481 compile_recurse_fallbackpath(common, current);
6482 break;
6483
6484 case OP_ASSERT:
6485 case OP_ASSERT_NOT:
6486 case OP_ASSERTBACK:
6487 case OP_ASSERTBACK_NOT:
6488 compile_assert_fallbackpath(common, current);
6489 break;
6490
6491 case OP_ONCE:
6492 case OP_ONCE_NC:
6493 case OP_BRA:
6494 case OP_CBRA:
6495 case OP_COND:
6496 case OP_SBRA:
6497 case OP_SCBRA:
6498 case OP_SCOND:
6499 compile_bracket_fallbackpath(common, current);
6500 break;
6501
6502 case OP_BRAZERO:
6503 if (current->cc[1] > OP_ASSERTBACK_NOT)
6504 compile_bracket_fallbackpath(common, current);
6505 else
6506 compile_assert_fallbackpath(common, current);
6507 break;
6508
6509 case OP_BRAPOS:
6510 case OP_CBRAPOS:
6511 case OP_SBRAPOS:
6512 case OP_SCBRAPOS:
6513 case OP_BRAPOSZERO:
6514 compile_bracketpos_fallbackpath(common, current);
6515 break;
6516
6517 case OP_BRAMINZERO:
6518 compile_braminzero_fallbackpath(common, current);
6519 break;
6520
6521 case OP_FAIL:
6522 case OP_ACCEPT:
6523 case OP_ASSERT_ACCEPT:
6524 set_jumps(current->topfallbacks, LABEL());
6525 break;
6526
6527 default:
6528 SLJIT_ASSERT_STOP();
6529 break;
6530 }
6531 current = current->prev;
6532 }
6533 }
6534
6535 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6536 {
6537 DEFINE_COMPILER;
6538 pcre_uchar *cc = common->start + common->currententry->start;
6539 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6540 pcre_uchar *ccend = bracketend(cc);
6541 int localsize = get_localsize(common, ccbegin, ccend);
6542 int framesize = get_framesize(common, cc, TRUE);
6543 int alternativesize;
6544 BOOL needsframe;
6545 fallback_common altfallback;
6546 struct sljit_jump *jump;
6547
6548 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6549 needsframe = framesize >= 0;
6550 if (!needsframe)
6551 framesize = 0;
6552 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6553
6554 SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head != 0);
6555 common->currententry->entry = LABEL();
6556 set_jumps(common->currententry->calls, common->currententry->entry);
6557
6558 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6559 allocate_stack(common, localsize + framesize + alternativesize);
6560 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6561 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, STACK_TOP, 0);
6563 if (needsframe)
6564 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6565
6566 if (alternativesize > 0)
6567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6568
6569 memset(&altfallback, 0, sizeof(fallback_common));
6570 common->acceptlabel = NULL;
6571 common->accept = NULL;
6572 altfallback.cc = ccbegin;
6573 cc += GET(cc, 1);
6574 while (1)
6575 {
6576 altfallback.top = NULL;
6577 altfallback.topfallbacks = NULL;
6578
6579 if (altfallback.cc != ccbegin)
6580 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6581
6582 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6583 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6584 return;
6585
6586 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6587
6588 compile_fallbackpath(common, altfallback.top);
6589 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6590 return;
6591 set_jumps(altfallback.topfallbacks, LABEL());
6592
6593 if (*cc != OP_ALT)
6594 break;
6595
6596 altfallback.cc = cc + 1 + LINK_SIZE;
6597 cc += GET(cc, 1);
6598 }
6599 /* None of them matched. */
6600 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6601 jump = JUMP(SLJIT_JUMP);
6602
6603 set_jumps(common->accept, LABEL());
6604 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head);
6605 if (needsframe)
6606 {
6607 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6608 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6609 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6610 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6612 }
6613 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6614
6615 JUMPHERE(jump);
6616 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6617 free_stack(common, localsize + framesize + alternativesize);
6618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6619 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6620 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->recursive_head, TMP2, 0);
6621 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6622 }
6623
6624 #undef COMPILE_FALLBACKPATH
6625 #undef CURRENT_AS
6626
6627 void
6628 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
6629 {
6630 struct sljit_compiler *compiler;
6631 fallback_common rootfallback;
6632 compiler_common common_data;
6633 compiler_common *common = &common_data;
6634 const pcre_uint8 *tables = re->tables;
6635 pcre_study_data *study;
6636 pcre_uchar *ccend;
6637 executable_functions *functions;
6638 void *executable_func;
6639 sljit_uw executable_size;
6640 struct sljit_label *leave;
6641 struct sljit_label *mainloop = NULL;
6642 struct sljit_label *empty_match_found;
6643 struct sljit_label *empty_match_fallback;
6644 struct sljit_jump *jump;
6645 struct sljit_jump *reqbyte_notfound = NULL;
6646 struct sljit_jump *empty_match;
6647
6648 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6649 study = extra->study_data;
6650
6651 if (!tables)
6652 tables = PRIV(default_tables);
6653
6654 memset(&rootfallback, 0, sizeof(fallback_common));
6655 memset(common, 0, sizeof(compiler_common));
6656 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6657
6658 common->start = rootfallback.cc;
6659 common->fcc = tables + fcc_offset;
6660 common->lcc = (sljit_w)(tables + lcc_offset);
6661 common->mode = mode;
6662 common->nltype = NLTYPE_FIXED;
6663 switch(re->options & PCRE_NEWLINE_BITS)
6664 {
6665 case 0:
6666 /* Compile-time default */
6667 switch (NEWLINE)
6668 {
6669 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6670 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6671 default: common->newline = NEWLINE; break;
6672 }
6673 break;
6674 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6675 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6676 case PCRE_NEWLINE_CR+
6677 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6678 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6679 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6680 default: return;
6681 }
6682 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6683 common->bsr_nltype = NLTYPE_ANYCRLF;
6684 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6685 common->bsr_nltype = NLTYPE_ANY;
6686 else
6687 {
6688 #ifdef BSR_ANYCRLF
6689 common->bsr_nltype = NLTYPE_ANYCRLF;
6690 #else
6691 common->bsr_nltype = NLTYPE_ANY;
6692 #endif
6693 }
6694 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6695 common->ctypes = (sljit_w)(tables + ctypes_offset);
6696 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6697 common->name_count = re->name_count;
6698 common->name_entry_size = re->name_entry_size;
6699 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6700 #ifdef SUPPORT_UTF
6701 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6702 common->utf = (re->options & PCRE_UTF8) != 0;
6703 #ifdef SUPPORT_UCP
6704 common->use_ucp = (re->options & PCRE_UCP) != 0;
6705 #endif
6706 #endif /* SUPPORT_UTF */
6707 ccend = bracketend(rootfallback.cc);
6708
6709 /* Calculate the local space size on the stack. */
6710 common->ovector_start = CALL_LIMIT + sizeof(sljit_w);
6711
6712 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6713 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6714 if (common->localsize < 0)
6715 return;
6716
6717 /* Checking flags and updating ovector_start. */
6718 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0)
6719 {
6720 common->req_char_ptr = common->ovector_start;