/[pcre]/code/branches/pcre16/pcre_jit_compile.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 763 - (show annotations) (download)
Tue Nov 22 21:46:22 2011 UTC (2 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 202362 byte(s)
string utilities added
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2008 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (pcre_malloc)(size)
56 #define SLJIT_FREE(ptr) (pcre_free)(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error "Unsupported architecture"
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 PCRE_SPTR str;
152 PCRE_SPTR begin;
153 PCRE_SPTR end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_function {
166 void *executable_func;
167 pcre_jit_callback callback;
168 void *userdata;
169 } executable_function;
170
171 typedef struct jump_list {
172 struct sljit_jump *jump;
173 struct jump_list *next;
174 } jump_list;
175
176 enum stub_types { stack_alloc };
177
178 typedef struct stub_list {
179 enum stub_types type;
180 int data;
181 struct sljit_jump *start;
182 struct sljit_label *leave;
183 struct stub_list *next;
184 } stub_list;
185
186 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
187
188 /* The following structure is the key data type for the recursive
189 code generator. It is allocated by compile_hotpath, and contains
190 the aguments for compile_fallbackpath. Must be the first member
191 of its descendants. */
192 typedef struct fallback_common {
193 /* Concatenation stack. */
194 struct fallback_common *prev;
195 jump_list *nextfallbacks;
196 /* Internal stack (for component operators). */
197 struct fallback_common *top;
198 jump_list *topfallbacks;
199 /* Opcode pointer. */
200 pcre_uchar *cc;
201 } fallback_common;
202
203 typedef struct assert_fallback {
204 fallback_common common;
205 jump_list *condfailed;
206 /* Less than 0 (-1) if a frame is not needed. */
207 int framesize;
208 /* Points to our private memory word on the stack. */
209 int localptr;
210 /* For iterators. */
211 struct sljit_label *hotpath;
212 } assert_fallback;
213
214 typedef struct bracket_fallback {
215 fallback_common common;
216 /* Where to coninue if an alternative is successfully matched. */
217 struct sljit_label *althotpath;
218 /* For rmin and rmax iterators. */
219 struct sljit_label *recursivehotpath;
220 /* For greedy ? operator. */
221 struct sljit_label *zerohotpath;
222 /* Contains the branches of a failed condition. */
223 union {
224 /* Both for OP_COND, OP_SCOND. */
225 jump_list *condfailed;
226 assert_fallback *assert;
227 /* For OP_ONCE. -1 if not needed. */
228 int framesize;
229 } u;
230 /* Points to our private memory word on the stack. */
231 int localptr;
232 } bracket_fallback;
233
234 typedef struct bracketpos_fallback {
235 fallback_common common;
236 /* Points to our private memory word on the stack. */
237 int localptr;
238 /* Reverting stack is needed. */
239 int framesize;
240 /* Allocated stack size. */
241 int stacksize;
242 } bracketpos_fallback;
243
244 typedef struct braminzero_fallback {
245 fallback_common common;
246 struct sljit_label *hotpath;
247 } braminzero_fallback;
248
249 typedef struct iterator_fallback {
250 fallback_common common;
251 /* Next iteration. */
252 struct sljit_label *hotpath;
253 } iterator_fallback;
254
255 typedef struct recurse_entry {
256 struct recurse_entry *next;
257 /* Contains the function entry. */
258 struct sljit_label *entry;
259 /* Collects the calls until the function is not created. */
260 jump_list *calls;
261 /* Points to the starting opcode. */
262 int start;
263 } recurse_entry;
264
265 typedef struct recurse_fallback {
266 fallback_common common;
267 } recurse_fallback;
268
269 typedef struct compiler_common {
270 struct sljit_compiler *compiler;
271 pcre_uchar *start;
272 int localsize;
273 int *localptrs;
274 const pcre_uint8 *fcc;
275 sljit_w lcc;
276 int cbraptr;
277 int nltype;
278 int newline;
279 int bsr_nltype;
280 int endonly;
281 sljit_w ctypes;
282 sljit_uw name_table;
283 sljit_w name_count;
284 sljit_w name_entry_size;
285 struct sljit_label *acceptlabel;
286 stub_list *stubs;
287 recurse_entry *entries;
288 recurse_entry *currententry;
289 jump_list *accept;
290 jump_list *calllimit;
291 jump_list *stackalloc;
292 jump_list *revertframes;
293 jump_list *wordboundary;
294 jump_list *anynewline;
295 jump_list *hspace;
296 jump_list *vspace;
297 jump_list *casefulcmp;
298 jump_list *caselesscmp;
299 BOOL jscript_compat;
300 #ifdef SUPPORT_UTF8
301 BOOL utf8;
302 #ifdef SUPPORT_UCP
303 BOOL useucp;
304 #endif
305 jump_list *utf8readchar;
306 jump_list *utf8readtype8;
307 #endif
308 #ifdef SUPPORT_UCP
309 jump_list *getucd;
310 #endif
311 } compiler_common;
312
313 /* For byte_sequence_compare. */
314
315 typedef struct compare_context {
316 int length;
317 int sourcereg;
318 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
319 int byteptr;
320 union {
321 int asint;
322 short asshort;
323 sljit_ub asbyte;
324 sljit_ub asbytes[4];
325 } c;
326 union {
327 int asint;
328 short asshort;
329 sljit_ub asbyte;
330 sljit_ub asbytes[4];
331 } oc;
332 #endif
333 } compare_context;
334
335 enum {
336 frame_end = 0,
337 frame_setstrbegin = -1
338 };
339
340 /* Used for accessing the elements of the stack. */
341 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
342
343 #define TMP1 SLJIT_TEMPORARY_REG1
344 #define TMP2 SLJIT_TEMPORARY_REG3
345 #define TMP3 SLJIT_TEMPORARY_EREG2
346 #define STR_PTR SLJIT_GENERAL_REG1
347 #define STR_END SLJIT_GENERAL_REG2
348 #define STACK_TOP SLJIT_TEMPORARY_REG2
349 #define STACK_LIMIT SLJIT_GENERAL_REG3
350 #define ARGUMENTS SLJIT_GENERAL_EREG1
351 #define CALL_COUNT SLJIT_GENERAL_EREG2
352 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
353
354 /* Locals layout. */
355 /* These two locals can be used by the current opcode. */
356 #define LOCALS0 (0 * sizeof(sljit_w))
357 #define LOCALS1 (1 * sizeof(sljit_w))
358 /* Two local variables for possessive quantifiers (char1 cannot use them). */
359 #define POSSESSIVE0 (2 * sizeof(sljit_w))
360 #define POSSESSIVE1 (3 * sizeof(sljit_w))
361 /* Head of the last recursion. */
362 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
363 /* Max limit of recursions. */
364 #define CALL_LIMIT (5 * sizeof(sljit_w))
365 /* Last known position of the requested byte. */
366 #define REQ_BYTE_PTR (6 * sizeof(sljit_w))
367 /* End pointer of the first line. */
368 #define FIRSTLINE_END (7 * sizeof(sljit_w))
369 /* The output vector is stored on the stack, and contains pointers
370 to characters. The vector data is divided into two groups: the first
371 group contains the start / end character pointers, and the second is
372 the start pointers when the end of the capturing group has not yet reached. */
373 #define OVECTOR_START (8 * sizeof(sljit_w))
374 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
375 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
376 #define PRIV(cc) (common->localptrs[(cc) - common->start])
377
378 /* Shortcuts. */
379 #define DEFINE_COMPILER \
380 struct sljit_compiler *compiler = common->compiler
381 #define OP1(op, dst, dstw, src, srcw) \
382 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
383 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
384 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
385 #define LABEL() \
386 sljit_emit_label(compiler)
387 #define JUMP(type) \
388 sljit_emit_jump(compiler, (type))
389 #define JUMPTO(type, label) \
390 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
391 #define JUMPHERE(jump) \
392 sljit_set_label((jump), sljit_emit_label(compiler))
393 #define CMP(type, src1, src1w, src2, src2w) \
394 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
395 #define CMPTO(type, src1, src1w, src2, src2w, label) \
396 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
397 #define COND_VALUE(op, dst, dstw, type) \
398 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
399
400 static pcre_uchar* bracketend(pcre_uchar* cc)
401 {
402 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
403 do cc += GET(cc, 1); while (*cc == OP_ALT);
404 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
405 cc += 1 + LINK_SIZE;
406 return cc;
407 }
408
409 /* Functions whose might need modification for all new supported opcodes:
410 next_opcode
411 get_localspace
412 set_localptrs
413 get_framesize
414 init_frame
415 get_localsize
416 copy_locals
417 compile_hotpath
418 compile_fallbackpath
419 */
420
421 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
422 {
423 SLJIT_UNUSED_ARG(common);
424 switch(*cc)
425 {
426 case OP_SOD:
427 case OP_SOM:
428 case OP_SET_SOM:
429 case OP_NOT_WORD_BOUNDARY:
430 case OP_WORD_BOUNDARY:
431 case OP_NOT_DIGIT:
432 case OP_DIGIT:
433 case OP_NOT_WHITESPACE:
434 case OP_WHITESPACE:
435 case OP_NOT_WORDCHAR:
436 case OP_WORDCHAR:
437 case OP_ANY:
438 case OP_ALLANY:
439 case OP_ANYNL:
440 case OP_NOT_HSPACE:
441 case OP_HSPACE:
442 case OP_NOT_VSPACE:
443 case OP_VSPACE:
444 case OP_EXTUNI:
445 case OP_EODN:
446 case OP_EOD:
447 case OP_CIRC:
448 case OP_CIRCM:
449 case OP_DOLL:
450 case OP_DOLLM:
451 case OP_TYPESTAR:
452 case OP_TYPEMINSTAR:
453 case OP_TYPEPLUS:
454 case OP_TYPEMINPLUS:
455 case OP_TYPEQUERY:
456 case OP_TYPEMINQUERY:
457 case OP_TYPEPOSSTAR:
458 case OP_TYPEPOSPLUS:
459 case OP_TYPEPOSQUERY:
460 case OP_CRSTAR:
461 case OP_CRMINSTAR:
462 case OP_CRPLUS:
463 case OP_CRMINPLUS:
464 case OP_CRQUERY:
465 case OP_CRMINQUERY:
466 case OP_DEF:
467 case OP_BRAZERO:
468 case OP_BRAMINZERO:
469 case OP_BRAPOSZERO:
470 case OP_FAIL:
471 case OP_ACCEPT:
472 case OP_ASSERT_ACCEPT:
473 case OP_SKIPZERO:
474 return cc + 1;
475
476 case OP_ANYBYTE:
477 #ifdef SUPPORT_UTF8
478 if (common->utf8) return NULL;
479 #endif
480 return cc + 1;
481
482 case OP_CHAR:
483 case OP_CHARI:
484 case OP_NOT:
485 case OP_NOTI:
486
487 case OP_STAR:
488 case OP_MINSTAR:
489 case OP_PLUS:
490 case OP_MINPLUS:
491 case OP_QUERY:
492 case OP_MINQUERY:
493 case OP_POSSTAR:
494 case OP_POSPLUS:
495 case OP_POSQUERY:
496 case OP_STARI:
497 case OP_MINSTARI:
498 case OP_PLUSI:
499 case OP_MINPLUSI:
500 case OP_QUERYI:
501 case OP_MINQUERYI:
502 case OP_POSSTARI:
503 case OP_POSPLUSI:
504 case OP_POSQUERYI:
505 case OP_NOTSTAR:
506 case OP_NOTMINSTAR:
507 case OP_NOTPLUS:
508 case OP_NOTMINPLUS:
509 case OP_NOTQUERY:
510 case OP_NOTMINQUERY:
511 case OP_NOTPOSSTAR:
512 case OP_NOTPOSPLUS:
513 case OP_NOTPOSQUERY:
514 case OP_NOTSTARI:
515 case OP_NOTMINSTARI:
516 case OP_NOTPLUSI:
517 case OP_NOTMINPLUSI:
518 case OP_NOTQUERYI:
519 case OP_NOTMINQUERYI:
520 case OP_NOTPOSSTARI:
521 case OP_NOTPOSPLUSI:
522 case OP_NOTPOSQUERYI:
523 cc += 2;
524 #ifdef SUPPORT_UTF8
525 if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
526 #endif
527 return cc;
528
529 case OP_UPTO:
530 case OP_MINUPTO:
531 case OP_EXACT:
532 case OP_POSUPTO:
533 case OP_UPTOI:
534 case OP_MINUPTOI:
535 case OP_EXACTI:
536 case OP_POSUPTOI:
537 case OP_NOTUPTO:
538 case OP_NOTMINUPTO:
539 case OP_NOTEXACT:
540 case OP_NOTPOSUPTO:
541 case OP_NOTUPTOI:
542 case OP_NOTMINUPTOI:
543 case OP_NOTEXACTI:
544 case OP_NOTPOSUPTOI:
545 cc += 4;
546 #ifdef SUPPORT_UTF8
547 if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
548 #endif
549 return cc;
550
551 case OP_NOTPROP:
552 case OP_PROP:
553 case OP_TYPEUPTO:
554 case OP_TYPEMINUPTO:
555 case OP_TYPEEXACT:
556 case OP_TYPEPOSUPTO:
557 case OP_REF:
558 case OP_REFI:
559 case OP_CREF:
560 case OP_NCREF:
561 case OP_RREF:
562 case OP_NRREF:
563 case OP_CLOSE:
564 cc += 3;
565 return cc;
566
567 case OP_CRRANGE:
568 case OP_CRMINRANGE:
569 return cc + 5;
570
571 case OP_CLASS:
572 case OP_NCLASS:
573 return cc + 33;
574
575 #ifdef SUPPORT_UTF8
576 case OP_XCLASS:
577 return cc + GET(cc, 1);
578 #endif
579
580 case OP_RECURSE:
581 case OP_ASSERT:
582 case OP_ASSERT_NOT:
583 case OP_ASSERTBACK:
584 case OP_ASSERTBACK_NOT:
585 case OP_REVERSE:
586 case OP_ONCE:
587 case OP_ONCE_NC:
588 case OP_BRA:
589 case OP_BRAPOS:
590 case OP_COND:
591 case OP_SBRA:
592 case OP_SBRAPOS:
593 case OP_SCOND:
594 case OP_ALT:
595 case OP_KET:
596 case OP_KETRMAX:
597 case OP_KETRMIN:
598 case OP_KETRPOS:
599 return cc + 1 + LINK_SIZE;
600
601 case OP_CBRA:
602 case OP_CBRAPOS:
603 case OP_SCBRA:
604 case OP_SCBRAPOS:
605 return cc + 1 + LINK_SIZE + 2;
606
607 default:
608 return NULL;
609 }
610 }
611
612 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
613 {
614 int localspace = 0;
615 pcre_uchar *alternative;
616 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
617 while (cc < ccend)
618 {
619 switch(*cc)
620 {
621 case OP_ASSERT:
622 case OP_ASSERT_NOT:
623 case OP_ASSERTBACK:
624 case OP_ASSERTBACK_NOT:
625 case OP_ONCE:
626 case OP_ONCE_NC:
627 case OP_BRAPOS:
628 case OP_SBRA:
629 case OP_SBRAPOS:
630 case OP_SCOND:
631 localspace += sizeof(sljit_w);
632 cc += 1 + LINK_SIZE;
633 break;
634
635 case OP_CBRAPOS:
636 case OP_SCBRAPOS:
637 localspace += sizeof(sljit_w);
638 cc += 1 + LINK_SIZE + 2;
639 break;
640
641 case OP_COND:
642 /* Might be a hidden SCOND. */
643 alternative = cc + GET(cc, 1);
644 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
645 localspace += sizeof(sljit_w);
646 cc += 1 + LINK_SIZE;
647 break;
648
649 default:
650 cc = next_opcode(common, cc);
651 if (cc == NULL)
652 return -1;
653 break;
654 }
655 }
656 return localspace;
657 }
658
659 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
660 {
661 pcre_uchar *cc = common->start;
662 pcre_uchar *alternative;
663 while (cc < ccend)
664 {
665 switch(*cc)
666 {
667 case OP_ASSERT:
668 case OP_ASSERT_NOT:
669 case OP_ASSERTBACK:
670 case OP_ASSERTBACK_NOT:
671 case OP_ONCE:
672 case OP_ONCE_NC:
673 case OP_BRAPOS:
674 case OP_SBRA:
675 case OP_SBRAPOS:
676 case OP_SCOND:
677 common->localptrs[cc - common->start] = localptr;
678 localptr += sizeof(sljit_w);
679 cc += 1 + LINK_SIZE;
680 break;
681
682 case OP_CBRAPOS:
683 case OP_SCBRAPOS:
684 common->localptrs[cc - common->start] = localptr;
685 localptr += sizeof(sljit_w);
686 cc += 1 + LINK_SIZE + 2;
687 break;
688
689 case OP_COND:
690 /* Might be a hidden SCOND. */
691 alternative = cc + GET(cc, 1);
692 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
693 {
694 common->localptrs[cc - common->start] = localptr;
695 localptr += sizeof(sljit_w);
696 }
697 cc += 1 + LINK_SIZE;
698 break;
699
700 default:
701 cc = next_opcode(common, cc);
702 SLJIT_ASSERT(cc != NULL);
703 break;
704 }
705 }
706 }
707
708 /* Returns with -1 if no need for frame. */
709 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
710 {
711 pcre_uchar *ccend = bracketend(cc);
712 int length = 0;
713 BOOL possessive = FALSE;
714 BOOL setsom_found = FALSE;
715
716 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
717 {
718 length = 3;
719 possessive = TRUE;
720 }
721
722 cc = next_opcode(common, cc);
723 SLJIT_ASSERT(cc != NULL);
724 while (cc < ccend)
725 switch(*cc)
726 {
727 case OP_SET_SOM:
728 case OP_RECURSE:
729 if (!setsom_found)
730 {
731 length += 2;
732 setsom_found = TRUE;
733 }
734 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
735 break;
736
737 case OP_CBRA:
738 case OP_CBRAPOS:
739 case OP_SCBRA:
740 case OP_SCBRAPOS:
741 length += 3;
742 cc += 1 + LINK_SIZE + 2;
743 break;
744
745 default:
746 cc = next_opcode(common, cc);
747 SLJIT_ASSERT(cc != NULL);
748 break;
749 }
750
751 /* Possessive quantifiers can use a special case. */
752 if (SLJIT_UNLIKELY(possessive) && length == 3)
753 return -1;
754
755 if (length > 0)
756 return length + 1;
757 return -1;
758 }
759
760 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
761 {
762 DEFINE_COMPILER;
763 pcre_uchar *ccend = bracketend(cc);
764 BOOL setsom_found = FALSE;
765 int offset;
766
767 /* >= 1 + shortest item size (2) */
768 SLJIT_ASSERT(stackpos >= stacktop + 2);
769
770 stackpos = STACK(stackpos);
771 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
772 cc = next_opcode(common, cc);
773 SLJIT_ASSERT(cc != NULL);
774 while (cc < ccend)
775 switch(*cc)
776 {
777 case OP_SET_SOM:
778 case OP_RECURSE:
779 if (!setsom_found)
780 {
781 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
783 stackpos += (int)sizeof(sljit_w);
784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
785 stackpos += (int)sizeof(sljit_w);
786 setsom_found = TRUE;
787 }
788 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
789 break;
790
791 case OP_CBRA:
792 case OP_CBRAPOS:
793 case OP_SCBRA:
794 case OP_SCBRAPOS:
795 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
797 stackpos += (int)sizeof(sljit_w);
798 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
799 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
801 stackpos += (int)sizeof(sljit_w);
802 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
803 stackpos += (int)sizeof(sljit_w);
804
805 cc += 1 + LINK_SIZE + 2;
806 break;
807
808 default:
809 cc = next_opcode(common, cc);
810 SLJIT_ASSERT(cc != NULL);
811 break;
812 }
813
814 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
815 SLJIT_ASSERT(stackpos == STACK(stacktop));
816 }
817
818 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
819 {
820 int localsize = 2;
821 pcre_uchar *alternative;
822 /* Calculate the sum of the local variables. */
823 while (cc < ccend)
824 {
825 switch(*cc)
826 {
827 case OP_ASSERT:
828 case OP_ASSERT_NOT:
829 case OP_ASSERTBACK:
830 case OP_ASSERTBACK_NOT:
831 case OP_ONCE:
832 case OP_ONCE_NC:
833 case OP_BRAPOS:
834 case OP_SBRA:
835 case OP_SBRAPOS:
836 case OP_SCOND:
837 localsize++;
838 cc += 1 + LINK_SIZE;
839 break;
840
841 case OP_CBRA:
842 case OP_SCBRA:
843 localsize++;
844 cc += 1 + LINK_SIZE + 2;
845 break;
846
847 case OP_CBRAPOS:
848 case OP_SCBRAPOS:
849 localsize += 2;
850 cc += 1 + LINK_SIZE + 2;
851 break;
852
853 case OP_COND:
854 /* Might be a hidden SCOND. */
855 alternative = cc + GET(cc, 1);
856 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
857 localsize++;
858 cc += 1 + LINK_SIZE;
859 break;
860
861 default:
862 cc = next_opcode(common, cc);
863 SLJIT_ASSERT(cc != NULL);
864 break;
865 }
866 }
867 SLJIT_ASSERT(cc == ccend);
868 return localsize;
869 }
870
871 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
872 BOOL save, int stackptr, int stacktop)
873 {
874 DEFINE_COMPILER;
875 int srcw[2];
876 int count;
877 BOOL tmp1next = TRUE;
878 BOOL tmp1empty = TRUE;
879 BOOL tmp2empty = TRUE;
880 pcre_uchar *alternative;
881 enum {
882 start,
883 loop,
884 end
885 } status;
886
887 status = save ? start : loop;
888 stackptr = STACK(stackptr - 2);
889 stacktop = STACK(stacktop - 1);
890
891 if (!save)
892 {
893 stackptr += sizeof(sljit_w);
894 if (stackptr < stacktop)
895 {
896 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
897 stackptr += sizeof(sljit_w);
898 tmp1empty = FALSE;
899 }
900 if (stackptr < stacktop)
901 {
902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
903 stackptr += sizeof(sljit_w);
904 tmp2empty = FALSE;
905 }
906 /* The tmp1next must be TRUE in either way. */
907 }
908
909 while (status != end)
910 {
911 count = 0;
912 switch(status)
913 {
914 case start:
915 SLJIT_ASSERT(save);
916 count = 1;
917 srcw[0] = RECURSIVE_HEAD;
918 status = loop;
919 break;
920
921 case loop:
922 if (cc >= ccend)
923 {
924 status = end;
925 break;
926 }
927
928 switch(*cc)
929 {
930 case OP_ASSERT:
931 case OP_ASSERT_NOT:
932 case OP_ASSERTBACK:
933 case OP_ASSERTBACK_NOT:
934 case OP_ONCE:
935 case OP_ONCE_NC:
936 case OP_BRAPOS:
937 case OP_SBRA:
938 case OP_SBRAPOS:
939 case OP_SCOND:
940 count = 1;
941 srcw[0] = PRIV(cc);
942 SLJIT_ASSERT(srcw[0] != 0);
943 cc += 1 + LINK_SIZE;
944 break;
945
946 case OP_CBRA:
947 case OP_SCBRA:
948 count = 1;
949 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
950 cc += 1 + LINK_SIZE + 2;
951 break;
952
953 case OP_CBRAPOS:
954 case OP_SCBRAPOS:
955 count = 2;
956 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
957 srcw[0] = PRIV(cc);
958 SLJIT_ASSERT(srcw[0] != 0);
959 cc += 1 + LINK_SIZE + 2;
960 break;
961
962 case OP_COND:
963 /* Might be a hidden SCOND. */
964 alternative = cc + GET(cc, 1);
965 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
966 {
967 count = 1;
968 srcw[0] = PRIV(cc);
969 SLJIT_ASSERT(srcw[0] != 0);
970 }
971 cc += 1 + LINK_SIZE;
972 break;
973
974 default:
975 cc = next_opcode(common, cc);
976 SLJIT_ASSERT(cc != NULL);
977 break;
978 }
979 break;
980
981 case end:
982 SLJIT_ASSERT_STOP();
983 break;
984 }
985
986 while (count > 0)
987 {
988 count--;
989 if (save)
990 {
991 if (tmp1next)
992 {
993 if (!tmp1empty)
994 {
995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
996 stackptr += sizeof(sljit_w);
997 }
998 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
999 tmp1empty = FALSE;
1000 tmp1next = FALSE;
1001 }
1002 else
1003 {
1004 if (!tmp2empty)
1005 {
1006 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1007 stackptr += sizeof(sljit_w);
1008 }
1009 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1010 tmp2empty = FALSE;
1011 tmp1next = TRUE;
1012 }
1013 }
1014 else
1015 {
1016 if (tmp1next)
1017 {
1018 SLJIT_ASSERT(!tmp1empty);
1019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1020 tmp1empty = stackptr >= stacktop;
1021 if (!tmp1empty)
1022 {
1023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1024 stackptr += sizeof(sljit_w);
1025 }
1026 tmp1next = FALSE;
1027 }
1028 else
1029 {
1030 SLJIT_ASSERT(!tmp2empty);
1031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1032 tmp2empty = stackptr >= stacktop;
1033 if (!tmp2empty)
1034 {
1035 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1036 stackptr += sizeof(sljit_w);
1037 }
1038 tmp1next = TRUE;
1039 }
1040 }
1041 }
1042 }
1043
1044 if (save)
1045 {
1046 if (tmp1next)
1047 {
1048 if (!tmp1empty)
1049 {
1050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1051 stackptr += sizeof(sljit_w);
1052 }
1053 if (!tmp2empty)
1054 {
1055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1056 stackptr += sizeof(sljit_w);
1057 }
1058 }
1059 else
1060 {
1061 if (!tmp2empty)
1062 {
1063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1064 stackptr += sizeof(sljit_w);
1065 }
1066 if (!tmp1empty)
1067 {
1068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1069 stackptr += sizeof(sljit_w);
1070 }
1071 }
1072 }
1073 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1074 }
1075
1076 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1077 {
1078 return (value & (value - 1)) == 0;
1079 }
1080
1081 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1082 {
1083 while (list)
1084 {
1085 /* sljit_set_label is clever enough to do nothing
1086 if either the jump or the label is NULL */
1087 sljit_set_label(list->jump, label);
1088 list = list->next;
1089 }
1090 }
1091
1092 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1093 {
1094 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1095 if (list_item)
1096 {
1097 list_item->next = *list;
1098 list_item->jump = jump;
1099 *list = list_item;
1100 }
1101 }
1102
1103 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1104 {
1105 DEFINE_COMPILER;
1106 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1107
1108 if (list_item)
1109 {
1110 list_item->type = type;
1111 list_item->data = data;
1112 list_item->start = start;
1113 list_item->leave = LABEL();
1114 list_item->next = common->stubs;
1115 common->stubs = list_item;
1116 }
1117 }
1118
1119 static void flush_stubs(compiler_common *common)
1120 {
1121 DEFINE_COMPILER;
1122 stub_list* list_item = common->stubs;
1123
1124 while (list_item)
1125 {
1126 JUMPHERE(list_item->start);
1127 switch(list_item->type)
1128 {
1129 case stack_alloc:
1130 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1131 break;
1132 }
1133 JUMPTO(SLJIT_JUMP, list_item->leave);
1134 list_item = list_item->next;
1135 }
1136 common->stubs = NULL;
1137 }
1138
1139 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1140 {
1141 DEFINE_COMPILER;
1142
1143 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1144 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1145 }
1146
1147 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1148 {
1149 /* May destroy all locals and registers except TMP2. */
1150 DEFINE_COMPILER;
1151
1152 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1153 #ifdef DESTROY_REGISTERS
1154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1155 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1156 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1159 #endif
1160 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1161 }
1162
1163 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1164 {
1165 DEFINE_COMPILER;
1166 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1167 }
1168
1169 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1170 {
1171 DEFINE_COMPILER;
1172 struct sljit_label *loop;
1173 int i;
1174 /* At this point we can freely use all temporary registers. */
1175 /* TMP1 returns with begin - 1. */
1176 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, 1);
1177 if (length < 8)
1178 {
1179 for (i = 0; i < length; i++)
1180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1181 }
1182 else
1183 {
1184 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1185 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1186 loop = LABEL();
1187 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1188 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1189 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1190 }
1191 }
1192
1193 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1194 {
1195 DEFINE_COMPILER;
1196 struct sljit_label *loop;
1197 struct sljit_jump *earlyexit;
1198
1199 /* At this point we can freely use all registers. */
1200 OP1(SLJIT_MOV, SLJIT_GENERAL_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1202
1203 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1204 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1205 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1206 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1207 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1208 /* Unlikely, but possible */
1209 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1210 loop = LABEL();
1211 OP2(SLJIT_SUB, SLJIT_GENERAL_REG2, 0, SLJIT_MEM1(SLJIT_GENERAL_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1212 OP2(SLJIT_ADD, SLJIT_GENERAL_REG1, 0, SLJIT_GENERAL_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1213 /* Copy the integer value to the output buffer */
1214 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_GENERAL_REG2, 0);
1215 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1216 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1217 JUMPHERE(earlyexit);
1218
1219 /* Calculate the return value, which is the maximum ovector value. */
1220 if (topbracket > 1)
1221 {
1222 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1223 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1224
1225 /* OVECTOR(0) is never equal to SLJIT_GENERAL_REG3. */
1226 loop = LABEL();
1227 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1228 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1229 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_GENERAL_REG3, 0, loop);
1230 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1231 }
1232 else
1233 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1234 }
1235
1236 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1237 {
1238 /* Detects if the character has an othercase. */
1239 unsigned int c;
1240
1241 #ifdef SUPPORT_UTF8
1242 if (common->utf8)
1243 {
1244 GETCHAR(c, cc);
1245 if (c > 127)
1246 {
1247 #ifdef SUPPORT_UCP
1248 return c != UCD_OTHERCASE(c);
1249 #else
1250 return FALSE;
1251 #endif
1252 }
1253 }
1254 else
1255 #endif
1256 c = *cc;
1257 return common->fcc[c] != c;
1258 }
1259
1260 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1261 {
1262 /* Returns with the othercase. */
1263 #ifdef SUPPORT_UTF8
1264 if (common->utf8 && c > 127)
1265 {
1266 #ifdef SUPPORT_UCP
1267 return UCD_OTHERCASE(c);
1268 #else
1269 return c;
1270 #endif
1271 }
1272 #endif
1273 return common->fcc[c];
1274 }
1275
1276 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1277 {
1278 /* Detects if the character and its othercase has only 1 bit difference. */
1279 unsigned int c, oc, bit;
1280 #ifdef SUPPORT_UTF8
1281 int n;
1282 #endif
1283
1284 #ifdef SUPPORT_UTF8
1285 if (common->utf8)
1286 {
1287 GETCHAR(c, cc);
1288 if (c <= 127)
1289 oc = common->fcc[c];
1290 else
1291 {
1292 #ifdef SUPPORT_UCP
1293 oc = UCD_OTHERCASE(c);
1294 #else
1295 oc = c;
1296 #endif
1297 }
1298 }
1299 else
1300 {
1301 c = *cc;
1302 oc = common->fcc[c];
1303 }
1304 #else
1305 c = *cc;
1306 oc = common->fcc[c];
1307 #endif
1308
1309 SLJIT_ASSERT(c != oc);
1310
1311 bit = c ^ oc;
1312 /* Optimized for English alphabet. */
1313 if (c <= 127 && bit == 0x20)
1314 return (0 << 8) | 0x20;
1315
1316 /* Since c != oc, they must have at least 1 bit difference. */
1317 if (!ispowerof2(bit))
1318 return 0;
1319
1320 #ifdef SUPPORT_UTF8
1321 if (common->utf8 && c > 127)
1322 {
1323 n = _pcre_utf8_table4[*cc & 0x3f];
1324 while ((bit & 0x3f) == 0)
1325 {
1326 n--;
1327 bit >>= 6;
1328 }
1329 return (n << 8) | bit;
1330 }
1331 #endif
1332 return (0 << 8) | bit;
1333 }
1334
1335 static SLJIT_INLINE void check_input_end(compiler_common *common, jump_list **fallbacks)
1336 {
1337 DEFINE_COMPILER;
1338 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1339 }
1340
1341 static void read_char(compiler_common *common)
1342 {
1343 /* Reads the character into TMP1, updates STR_PTR.
1344 Does not check STR_END. TMP2 Destroyed. */
1345 DEFINE_COMPILER;
1346 #ifdef SUPPORT_UTF8
1347 struct sljit_jump *jump;
1348 #endif
1349
1350 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1351 #ifdef SUPPORT_UTF8
1352 if (common->utf8)
1353 {
1354 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1355 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1356 JUMPHERE(jump);
1357 }
1358 #endif
1359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1360 }
1361
1362 static void peek_char(compiler_common *common)
1363 {
1364 /* Reads the character into TMP1, keeps STR_PTR.
1365 Does not check STR_END. TMP2 Destroyed. */
1366 DEFINE_COMPILER;
1367 #ifdef SUPPORT_UTF8
1368 struct sljit_jump *jump;
1369 #endif
1370
1371 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1372 #ifdef SUPPORT_UTF8
1373 if (common->utf8)
1374 {
1375 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1376 add_jump(compiler, &common->utf8readchar, JUMP(SLJIT_FAST_CALL));
1377 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1378 JUMPHERE(jump);
1379 }
1380 #endif
1381 }
1382
1383 static void read_char8_type(compiler_common *common)
1384 {
1385 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1386 DEFINE_COMPILER;
1387 #ifdef SUPPORT_UTF8
1388 struct sljit_jump *jump;
1389 #endif
1390
1391 #ifdef SUPPORT_UTF8
1392 if (common->utf8)
1393 {
1394 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1395 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1396 /* This can be an extra read in some situations, but hopefully
1397 it is a clever early read in most cases. */
1398 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1399 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1400 add_jump(compiler, &common->utf8readtype8, JUMP(SLJIT_FAST_CALL));
1401 JUMPHERE(jump);
1402 return;
1403 }
1404 #endif
1405 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1406 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1407 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
1408 }
1409
1410 static void skip_char_back(compiler_common *common)
1411 {
1412 /* Goes one character back. Only affects STR_PTR. Does not check begin. */
1413 DEFINE_COMPILER;
1414 #ifdef SUPPORT_UTF8
1415 struct sljit_label *label;
1416
1417 if (common->utf8)
1418 {
1419 label = LABEL();
1420 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1421 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1422 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1423 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1424 return;
1425 }
1426 #endif
1427 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1428 }
1429
1430 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1431 {
1432 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1433 DEFINE_COMPILER;
1434
1435 if (nltype == NLTYPE_ANY)
1436 {
1437 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1438 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1439 }
1440 else if (nltype == NLTYPE_ANYCRLF)
1441 {
1442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1443 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1445 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1446 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1447 }
1448 else
1449 {
1450 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline <= 255);
1451 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1452 }
1453 }
1454
1455 #ifdef SUPPORT_UTF8
1456 static void do_utf8readchar(compiler_common *common)
1457 {
1458 /* Fast decoding an utf8 character. TMP1 contains the first byte
1459 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1460 DEFINE_COMPILER;
1461 struct sljit_jump *jump;
1462
1463 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1464 /* Searching for the first zero. */
1465 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1466 jump = JUMP(SLJIT_C_NOT_ZERO);
1467 /* 2 byte sequence */
1468 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1470 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1471 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1472 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1473 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1474 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
1475 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1476 JUMPHERE(jump);
1477
1478 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1479 jump = JUMP(SLJIT_C_NOT_ZERO);
1480 /* 3 byte sequence */
1481 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1482 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1483 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1484 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1485 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1486 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1487 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1488 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 2);
1489 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1490 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1491 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 2);
1492 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1493 JUMPHERE(jump);
1494
1495 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x08);
1496 jump = JUMP(SLJIT_C_NOT_ZERO);
1497 /* 4 byte sequence */
1498 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1499 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1500 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1501 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1502 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1503 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1504 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1505 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1506 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1508 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1509 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 3);
1510 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1512 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 3);
1513 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1514 JUMPHERE(jump);
1515
1516 /* 5 byte sequence */
1517 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
1518 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x03);
1519 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 24);
1520 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1521 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
1522 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1523 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 2);
1524 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1525 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1526 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1527 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 3);
1528 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1529 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1530 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1531 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 4);
1532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 4);
1533 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1534 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1535 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 4);
1536 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1537 }
1538
1539 static void do_utf8readtype8(compiler_common *common)
1540 {
1541 /* Fast decoding an utf8 character type. TMP2 contains the first byte
1542 of the character (>= 0xc0) and TMP1 is destroyed. Return value in TMP1. */
1543 DEFINE_COMPILER;
1544 struct sljit_jump *jump;
1545 struct sljit_jump *compare;
1546
1547 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1548
1549 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1550 jump = JUMP(SLJIT_C_NOT_ZERO);
1551 /* 2 byte sequence */
1552 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1554 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1555 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1556 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1557 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1558 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1559 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1560 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1561
1562 JUMPHERE(compare);
1563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1564 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1565 JUMPHERE(jump);
1566
1567 /* We only have types for characters less than 256. */
1568 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_utf8_table4 - 0xc0);
1569 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1570 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1571 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1572 }
1573
1574 #endif
1575
1576 #ifdef SUPPORT_UCP
1577
1578 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1579 #define UCD_BLOCK_MASK 127
1580 #define UCD_BLOCK_SHIFT 7
1581
1582 static void do_getucd(compiler_common *common)
1583 {
1584 /* Search the UCD record for the character comes in TMP1.
1585 Returns chartype in TMP1 and UCD offset in TMP2. */
1586 DEFINE_COMPILER;
1587
1588 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1589
1590 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1591 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1592 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)_pcre_ucd_stage1);
1593 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1594 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1595 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1596 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_stage2);
1597 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1598 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, chartype));
1599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1600 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1601 }
1602 #endif
1603
1604 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1605 {
1606 DEFINE_COMPILER;
1607 struct sljit_label *mainloop;
1608 struct sljit_label *newlinelabel = NULL;
1609 struct sljit_jump *start;
1610 struct sljit_jump *end = NULL;
1611 struct sljit_jump *nl = NULL;
1612 #ifdef SUPPORT_UTF8
1613 struct sljit_jump *singlebyte;
1614 #endif
1615 jump_list *newline = NULL;
1616 BOOL newlinecheck = FALSE;
1617 BOOL readbyte = FALSE;
1618
1619 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1620 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1621 newlinecheck = TRUE;
1622
1623 if (firstline)
1624 {
1625 /* Search for the end of the first line. */
1626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1628
1629 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1630 {
1631 mainloop = LABEL();
1632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1633 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1634 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);
1635 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1636 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1637 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1638 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, 1);
1639 }
1640 else
1641 {
1642 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1643 mainloop = LABEL();
1644 /* Continual stores does not cause data dependency. */
1645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1646 read_char(common);
1647 check_newlinechar(common, common->nltype, &newline, TRUE);
1648 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1650 set_jumps(newline, LABEL());
1651 }
1652
1653 JUMPHERE(end);
1654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1655 }
1656
1657 start = JUMP(SLJIT_JUMP);
1658
1659 if (newlinecheck)
1660 {
1661 newlinelabel = LABEL();
1662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1663 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1665 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1666 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1667 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1668 nl = JUMP(SLJIT_JUMP);
1669 }
1670
1671 mainloop = LABEL();
1672
1673 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1674 #ifdef SUPPORT_UTF8
1675 if (common->utf8) readbyte = TRUE;
1676 #endif
1677 if (newlinecheck) readbyte = TRUE;
1678
1679 if (readbyte)
1680 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1681
1682 if (newlinecheck)
1683 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1684
1685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1686 #ifdef SUPPORT_UTF8
1687 if (common->utf8)
1688 {
1689 singlebyte = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1690 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_table4 - 0xc0);
1691 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1692 JUMPHERE(singlebyte);
1693 }
1694 #endif
1695 JUMPHERE(start);
1696
1697 if (newlinecheck)
1698 {
1699 JUMPHERE(end);
1700 JUMPHERE(nl);
1701 }
1702
1703 return mainloop;
1704 }
1705
1706 static SLJIT_INLINE void fast_forward_first_byte(compiler_common *common, pcre_uint16 firstbyte, BOOL firstline)
1707 {
1708 DEFINE_COMPILER;
1709 struct sljit_label *start;
1710 struct sljit_jump *leave;
1711 struct sljit_jump *found;
1712 pcre_uint16 oc, bit;
1713
1714 if (firstline)
1715 {
1716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1717 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1718 }
1719
1720 start = LABEL();
1721 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1722 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1723
1724 if ((firstbyte & REQ_CASELESS) == 0)
1725 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, firstbyte & 0xff);
1726 else
1727 {
1728 firstbyte &= 0xff;
1729 oc = common->fcc[firstbyte];
1730 bit = firstbyte ^ oc;
1731 if (ispowerof2(bit))
1732 {
1733 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
1734 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, firstbyte | bit);
1735 }
1736 else
1737 {
1738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, firstbyte);
1739 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1740 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
1741 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1742 found = JUMP(SLJIT_C_NOT_ZERO);
1743 }
1744 }
1745
1746 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1747 #ifdef SUPPORT_UTF8
1748 if (common->utf8)
1749 {
1750 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1751 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_table4 - 0xc0);
1752 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1753 }
1754 #endif
1755 JUMPTO(SLJIT_JUMP, start);
1756 JUMPHERE(found);
1757 JUMPHERE(leave);
1758
1759 if (firstline)
1760 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1761 }
1762
1763 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
1764 {
1765 DEFINE_COMPILER;
1766 struct sljit_label *loop;
1767 struct sljit_jump *lastchar;
1768 struct sljit_jump *firstchar;
1769 struct sljit_jump *leave;
1770 struct sljit_jump *foundcr = NULL;
1771 struct sljit_jump *notfoundnl;
1772 jump_list *newline = NULL;
1773
1774 if (firstline)
1775 {
1776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1777 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1778 }
1779
1780 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1781 {
1782 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1783 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1784 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1785 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1786 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1787
1788 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
1789 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
1790 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
1791 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1792
1793 loop = LABEL();
1794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1795 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1796 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
1797 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
1798 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
1799 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
1800
1801 JUMPHERE(leave);
1802 JUMPHERE(firstchar);
1803 JUMPHERE(lastchar);
1804
1805 if (firstline)
1806 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1807 return;
1808 }
1809
1810 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
1812 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
1813 skip_char_back(common);
1814
1815 loop = LABEL();
1816 read_char(common);
1817 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1818 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1819 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
1820 check_newlinechar(common, common->nltype, &newline, FALSE);
1821 set_jumps(newline, loop);
1822
1823 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
1824 {
1825 leave = JUMP(SLJIT_JUMP);
1826 JUMPHERE(foundcr);
1827 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1828 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1829 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1830 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1831 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1832 JUMPHERE(notfoundnl);
1833 JUMPHERE(leave);
1834 }
1835 JUMPHERE(lastchar);
1836 JUMPHERE(firstchar);
1837
1838 if (firstline)
1839 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1840 }
1841
1842 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
1843 {
1844 DEFINE_COMPILER;
1845 struct sljit_label *start;
1846 struct sljit_jump *leave;
1847 struct sljit_jump *found;
1848
1849 if (firstline)
1850 {
1851 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1852 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1853 }
1854
1855 start = LABEL();
1856 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1857 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1858 #ifdef SUPPORT_UTF8
1859 if (common->utf8)
1860 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1861 #endif
1862 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
1863 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
1864 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
1865 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
1866 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
1867 found = JUMP(SLJIT_C_NOT_ZERO);
1868
1869 #ifdef SUPPORT_UTF8
1870 if (common->utf8)
1871 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
1872 #endif
1873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
1874 #ifdef SUPPORT_UTF8
1875 if (common->utf8)
1876 {
1877 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
1878 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_table4 - 0xc0);
1879 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1880 }
1881 #endif
1882 JUMPTO(SLJIT_JUMP, start);
1883 JUMPHERE(found);
1884 JUMPHERE(leave);
1885
1886 if (firstline)
1887 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
1888 }
1889
1890 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uint16 reqbyte, BOOL has_firstbyte)
1891 {
1892 DEFINE_COMPILER;
1893 struct sljit_label *loop;
1894 struct sljit_jump *toolong;
1895 struct sljit_jump *alreadyfound;
1896 struct sljit_jump *found;
1897 struct sljit_jump *foundoc = NULL;
1898 struct sljit_jump *notfound;
1899 pcre_uint16 oc, bit;
1900
1901 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR);
1902 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
1903 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
1904 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
1905
1906 if (has_firstbyte)
1907 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, 1);
1908 else
1909 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
1910
1911 loop = LABEL();
1912 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
1913
1914 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1915 if ((reqbyte & REQ_CASELESS) == 0)
1916 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte & 0xff);
1917 else
1918 {
1919 reqbyte &= 0xff;
1920 oc = common->fcc[reqbyte];
1921 bit = reqbyte ^ oc;
1922 if (ispowerof2(bit))
1923 {
1924 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
1925 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte | bit);
1926 }
1927 else
1928 {
1929 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, reqbyte);
1930 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
1931 }
1932 }
1933 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1934 JUMPTO(SLJIT_JUMP, loop);
1935
1936 JUMPHERE(found);
1937 if (foundoc)
1938 JUMPHERE(foundoc);
1939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, TMP1, 0);
1940 JUMPHERE(alreadyfound);
1941 JUMPHERE(toolong);
1942 return notfound;
1943 }
1944
1945 static void do_revertframes(compiler_common *common)
1946 {
1947 DEFINE_COMPILER;
1948 struct sljit_jump *jump;
1949 struct sljit_label *mainloop;
1950
1951 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1952 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
1953
1954 /* Drop frames until we reach STACK_TOP. */
1955 mainloop = LABEL();
1956 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
1957 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1958 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
1959 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1960 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
1961 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
1962 JUMPTO(SLJIT_JUMP, mainloop);
1963
1964 JUMPHERE(jump);
1965 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
1966 /* End of dropping frames. */
1967 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1968
1969 JUMPHERE(jump);
1970 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
1971 /* Set string begin. */
1972 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
1973 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
1974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
1975 JUMPTO(SLJIT_JUMP, mainloop);
1976
1977 JUMPHERE(jump);
1978 /* Unknown command. */
1979 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
1980 JUMPTO(SLJIT_JUMP, mainloop);
1981 }
1982
1983 static void check_wordboundary(compiler_common *common)
1984 {
1985 DEFINE_COMPILER;
1986 struct sljit_jump *beginend;
1987 #ifdef SUPPORT_UTF8
1988 struct sljit_jump *jump;
1989 #endif
1990
1991 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
1992
1993 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
1994 /* Get type of the previous char, and put it to LOCALS1. */
1995 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
1996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
1997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
1998 beginend = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
1999 skip_char_back(common);
2000 read_char(common);
2001
2002 /* Testing char type. */
2003 #ifdef SUPPORT_UCP
2004 if (common->useucp)
2005 {
2006 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2007 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2008 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2009 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2010 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2011 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2012 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2013 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2014 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2015 JUMPHERE(jump);
2016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2017 }
2018 else
2019 #endif
2020 {
2021 #ifdef SUPPORT_UTF8
2022 /* Here LOCALS1 has already been zeroed. */
2023 jump = NULL;
2024 if (common->utf8)
2025 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2026 #endif
2027 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2028 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2029 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2031 #ifdef SUPPORT_UTF8
2032 if (jump != NULL)
2033 JUMPHERE(jump);
2034 #endif
2035 }
2036 JUMPHERE(beginend);
2037
2038 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2039 beginend = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2040 peek_char(common);
2041
2042 /* Testing char type. This is a code duplication. */
2043 #ifdef SUPPORT_UCP
2044 if (common->useucp)
2045 {
2046 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2047 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2048 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2049 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2051 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2052 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2053 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2054 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2055 JUMPHERE(jump);
2056 }
2057 else
2058 #endif
2059 {
2060 #ifdef SUPPORT_UTF8
2061 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2062 jump = NULL;
2063 if (common->utf8)
2064 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2065 #endif
2066 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2067 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2068 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2069 #ifdef SUPPORT_UTF8
2070 if (jump != NULL)
2071 JUMPHERE(jump);
2072 #endif
2073 }
2074 JUMPHERE(beginend);
2075
2076 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2077 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2078 }
2079
2080 static void check_anynewline(compiler_common *common)
2081 {
2082 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2083 DEFINE_COMPILER;
2084
2085 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2086
2087 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2088 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2089 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2090 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2091 #ifdef SUPPORT_UTF8
2092 if (common->utf8)
2093 {
2094 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2095 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2097 }
2098 #endif
2099 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2100 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2101 }
2102
2103 static void check_hspace(compiler_common *common)
2104 {
2105 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2106 DEFINE_COMPILER;
2107
2108 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2109
2110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2111 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2112 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2113 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2114 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2115 #ifdef SUPPORT_UTF8
2116 if (common->utf8)
2117 {
2118 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2120 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2122 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2123 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2124 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2125 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2127 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2128 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2129 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2130 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2131 }
2132 #endif
2133 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2134
2135 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2136 }
2137
2138 static void check_vspace(compiler_common *common)
2139 {
2140 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2141 DEFINE_COMPILER;
2142
2143 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2144
2145 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2146 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2147 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2148 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2149 #ifdef SUPPORT_UTF8
2150 if (common->utf8)
2151 {
2152 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2153 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2154 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2155 }
2156 #endif
2157 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2158
2159 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2160 }
2161
2162 #define CHAR1 STR_END
2163 #define CHAR2 STACK_TOP
2164
2165 static void do_casefulcmp(compiler_common *common)
2166 {
2167 DEFINE_COMPILER;
2168 struct sljit_jump *jump;
2169 struct sljit_label *label;
2170
2171 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2172 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2173 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2175 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2176 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2177
2178 label = LABEL();
2179 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2180 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2181 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2182 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2183 JUMPTO(SLJIT_C_NOT_ZERO, label);
2184
2185 JUMPHERE(jump);
2186 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2187 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2188 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2190 }
2191
2192 #define LCC_TABLE STACK_LIMIT
2193
2194 static void do_caselesscmp(compiler_common *common)
2195 {
2196 DEFINE_COMPILER;
2197 struct sljit_jump *jump;
2198 struct sljit_label *label;
2199
2200 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2201 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2202
2203 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2204 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2206 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2207 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2208 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2209
2210 label = LABEL();
2211 OP1(SLJIT_MOVU_UB, CHAR1, 0, SLJIT_MEM1(TMP1), 1);
2212 OP1(SLJIT_MOVU_UB, CHAR2, 0, SLJIT_MEM1(STR_PTR), 1);
2213 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2214 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2215 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2216 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2217 JUMPTO(SLJIT_C_NOT_ZERO, label);
2218
2219 JUMPHERE(jump);
2220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2221 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2222 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2223 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2224 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2225 }
2226
2227 #undef LCC_TABLE
2228 #undef CHAR1
2229 #undef CHAR2
2230
2231 #ifdef SUPPORT_UTF8
2232 #ifdef SUPPORT_UCP
2233
2234 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2235 {
2236 /* This function would be ineffective to do in JIT level. */
2237 int c1, c2;
2238 const pcre_uchar *src2 = args->ptr;
2239 const pcre_uchar *end2 = (pcre_uchar *)args->end;
2240
2241 while (src1 < end1)
2242 {
2243 if (src2 >= end2)
2244 return 0;
2245 GETCHARINC(c1, src1);
2246 GETCHARINC(c2, src2);
2247 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return 0;
2248 }
2249 return src2;
2250 }
2251
2252 #endif
2253 #endif
2254
2255 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2256 compare_context* context, jump_list **fallbacks)
2257 {
2258 DEFINE_COMPILER;
2259 unsigned int othercasebit = 0;
2260 pcre_uint8 *othercasebyte = NULL;
2261 #ifdef SUPPORT_UTF8
2262 int utf8length;
2263 #endif
2264
2265 if (caseless && char_has_othercase(common, cc))
2266 {
2267 othercasebit = char_get_othercase_bit(common, cc);
2268 SLJIT_ASSERT(othercasebit);
2269 /* Extracting bit difference info. */
2270 othercasebyte = cc + (othercasebit >> 8);
2271 othercasebit &= 0xff;
2272 }
2273
2274 if (context->sourcereg == -1)
2275 {
2276 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2277 if (context->length >= 4)
2278 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2279 else if (context->length >= 2)
2280 OP1(SLJIT_MOV_SH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2281 else
2282 #endif
2283 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2284 context->sourcereg = TMP2;
2285 }
2286
2287 #ifdef SUPPORT_UTF8
2288 utf8length = 1;
2289 if (common->utf8 && *cc >= 0xc0)
2290 utf8length += _pcre_utf8_table4[*cc & 0x3f];
2291
2292 do
2293 {
2294 #endif
2295
2296 context->length--;
2297 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2298
2299 /* Unaligned read is supported. */
2300 if (othercasebit != 0 && othercasebyte == cc)
2301 {
2302 context->c.asbytes[context->byteptr] = *cc | othercasebit;
2303 context->oc.asbytes[context->byteptr] = othercasebit;
2304 }
2305 else
2306 {
2307 context->c.asbytes[context->byteptr] = *cc;
2308 context->oc.asbytes[context->byteptr] = 0;
2309 }
2310 context->byteptr++;
2311
2312 if (context->byteptr >= 4 || context->length == 0 || (context->byteptr == 2 && context->length == 1))
2313 {
2314 if (context->length >= 4)
2315 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2316 else if (context->length >= 2)
2317 OP1(SLJIT_MOV_SH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2318 else if (context->length >= 1)
2319 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2320 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2321
2322 switch(context->byteptr)
2323 {
2324 case 4:
2325 if (context->oc.asint != 0)
2326 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2327 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2328 break;
2329
2330 case 2:
2331 if (context->oc.asshort != 0)
2332 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asshort);
2333 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asshort | context->oc.asshort));
2334 break;
2335
2336 case 1:
2337 if (context->oc.asbyte != 0)
2338 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2339 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2340 break;
2341
2342 default:
2343 SLJIT_ASSERT_STOP();
2344 break;
2345 }
2346 context->byteptr = 0;
2347 }
2348
2349 #else
2350
2351 /* Unaligned read is unsupported. */
2352 if (context->length > 0)
2353 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2354 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2355
2356 if (othercasebit != 0 && othercasebyte == cc)
2357 {
2358 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2359 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2360 }
2361 else
2362 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2363
2364 #endif
2365
2366 cc++;
2367 #ifdef SUPPORT_UTF8
2368 utf8length--;
2369 }
2370 while (utf8length > 0);
2371 #endif
2372
2373 return cc;
2374 }
2375
2376 #ifdef SUPPORT_UTF8
2377
2378 #define SET_TYPE_OFFSET(value) \
2379 if ((value) != typeoffset) \
2380 { \
2381 if ((value) > typeoffset) \
2382 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2383 else \
2384 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2385 } \
2386 typeoffset = (value);
2387
2388 #define SET_CHAR_OFFSET(value) \
2389 if ((value) != charoffset) \
2390 { \
2391 if ((value) > charoffset) \
2392 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2393 else \
2394 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2395 } \
2396 charoffset = (value);
2397
2398 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2399 {
2400 DEFINE_COMPILER;
2401 jump_list *found = NULL;
2402 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2403 unsigned int c;
2404 int compares;
2405 struct sljit_jump *jump = NULL;
2406 pcre_uchar *ccbegin;
2407 #ifdef SUPPORT_UCP
2408 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2409 BOOL charsaved = FALSE;
2410 int typereg = TMP1, scriptreg = TMP1;
2411 unsigned int typeoffset;
2412 #endif
2413 int invertcmp, numberofcmps;
2414 unsigned int charoffset;
2415
2416 /* Although SUPPORT_UTF8 must be defined, we are not necessary in utf8 mode. */
2417 check_input_end(common, fallbacks);
2418 read_char(common);
2419
2420 if ((*cc++ & XCL_MAP) != 0)
2421 {
2422 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2423 if (common->utf8)
2424 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2425
2426 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2427 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2428 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2429 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2430 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2431 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2432
2433 if (common->utf8)
2434 JUMPHERE(jump);
2435 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2436 #ifdef SUPPORT_UCP
2437 charsaved = TRUE;
2438 #endif
2439 cc += 32;
2440 }
2441
2442 /* Scanning the necessary info. */
2443 ccbegin = cc;
2444 compares = 0;
2445 while (*cc != XCL_END)
2446 {
2447 compares++;
2448 if (*cc == XCL_SINGLE)
2449 {
2450 cc += 2;
2451 #ifdef SUPPORT_UTF8
2452 if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
2453 #endif
2454 #ifdef SUPPORT_UCP
2455 needschar = TRUE;
2456 #endif
2457 }
2458 else if (*cc == XCL_RANGE)
2459 {
2460 cc += 2;
2461 #ifdef SUPPORT_UTF8
2462 if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
2463 #endif
2464 cc++;
2465 #ifdef SUPPORT_UTF8
2466 if (common->utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
2467 #endif
2468 #ifdef SUPPORT_UCP
2469 needschar = TRUE;
2470 #endif
2471 }
2472 #ifdef SUPPORT_UCP
2473 else
2474 {
2475 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2476 cc++;
2477 switch(*cc)
2478 {
2479 case PT_ANY:
2480 break;
2481
2482 case PT_LAMP:
2483 case PT_GC:
2484 case PT_PC:
2485 case PT_ALNUM:
2486 needstype = TRUE;
2487 break;
2488
2489 case PT_SC:
2490 needsscript = TRUE;
2491 break;
2492
2493 case PT_SPACE:
2494 case PT_PXSPACE:
2495 case PT_WORD:
2496 needstype = TRUE;
2497 needschar = TRUE;
2498 break;
2499
2500 default:
2501 SLJIT_ASSERT_STOP();
2502 break;
2503 }
2504 cc += 2;
2505 }
2506 #endif
2507 }
2508
2509 #ifdef SUPPORT_UCP
2510 /* Simple register allocation. TMP1 is preferred if possible. */
2511 if (needstype || needsscript)
2512 {
2513 if (needschar && !charsaved)
2514 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2515 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2516 if (needschar)
2517 {
2518 if (needstype)
2519 {
2520 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2521 typereg = RETURN_ADDR;
2522 }
2523
2524 if (needsscript)
2525 scriptreg = TMP3;
2526 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2527 }
2528 else if (needstype && needsscript)
2529 scriptreg = TMP3;
2530 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2531
2532 if (needsscript)
2533 {
2534 if (scriptreg == TMP1)
2535 {
2536 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));
2537 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2538 }
2539 else
2540 {
2541 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2542 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)_pcre_ucd_records + SLJIT_OFFSETOF(ucd_record, script));
2543 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2544 }
2545 }
2546 }
2547 #endif
2548
2549 /* Generating code. */
2550 cc = ccbegin;
2551 charoffset = 0;
2552 numberofcmps = 0;
2553 #ifdef SUPPORT_UCP
2554 typeoffset = 0;
2555 #endif
2556
2557 while (*cc != XCL_END)
2558 {
2559 compares--;
2560 invertcmp = (compares == 0 && list != fallbacks);
2561 jump = NULL;
2562
2563 if (*cc == XCL_SINGLE)
2564 {
2565 cc ++;
2566 #ifdef SUPPORT_UTF8
2567 if (common->utf8)
2568 {
2569 GETCHARINC(c, cc);
2570 }
2571 else
2572 #endif
2573 c = *cc++;
2574
2575 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2576 {
2577 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2578 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2579 numberofcmps++;
2580 }
2581 else if (numberofcmps > 0)
2582 {
2583 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2584 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2585 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2586 numberofcmps = 0;
2587 }
2588 else
2589 {
2590 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2591 numberofcmps = 0;
2592 }
2593 }
2594 else if (*cc == XCL_RANGE)
2595 {
2596 cc ++;
2597 #ifdef SUPPORT_UTF8
2598 if (common->utf8)
2599 {
2600 GETCHARINC(c, cc);
2601 }
2602 else
2603 #endif
2604 c = *cc++;
2605 SET_CHAR_OFFSET(c);
2606 #ifdef SUPPORT_UTF8
2607 if (common->utf8)
2608 {
2609 GETCHARINC(c, cc);
2610 }
2611 else
2612 #endif
2613 c = *cc++;
2614 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2615 {
2616 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2617 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2618 numberofcmps++;
2619 }
2620 else if (numberofcmps > 0)
2621 {
2622 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2623 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2624 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2625 numberofcmps = 0;
2626 }
2627 else
2628 {
2629 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2630 numberofcmps = 0;
2631 }
2632 }
2633 #ifdef SUPPORT_UCP
2634 else
2635 {
2636 if (*cc == XCL_NOTPROP)
2637 invertcmp ^= 0x1;
2638 cc++;
2639 switch(*cc)
2640 {
2641 case PT_ANY:
2642 if (list != fallbacks)
2643 {
2644 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
2645 continue;
2646 }
2647 else if (cc[-1] == XCL_NOTPROP)
2648 continue;
2649 jump = JUMP(SLJIT_JUMP);
2650 break;
2651
2652 case PT_LAMP:
2653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
2654 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2655 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
2656 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2657 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
2658 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2659 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2660 break;
2661
2662 case PT_GC:
2663 c = _pcre_ucp_typerange[(int)cc[1] * 2];
2664 SET_TYPE_OFFSET(c);
2665 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, _pcre_ucp_typerange[(int)cc[1] * 2 + 1] - c);
2666 break;
2667
2668 case PT_PC:
2669 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
2670 break;
2671
2672 case PT_SC:
2673 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
2674 break;
2675
2676 case PT_SPACE:
2677 case PT_PXSPACE:
2678 if (*cc == PT_SPACE)
2679 {
2680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2681 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
2682 }
2683 SET_CHAR_OFFSET(9);
2684 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
2685 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2686 if (*cc == PT_SPACE)
2687 JUMPHERE(jump);
2688
2689 SET_TYPE_OFFSET(ucp_Zl);
2690 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
2691 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2692 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2693 break;
2694
2695 case PT_WORD:
2696 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
2697 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2698 /* ... fall through */
2699
2700 case PT_ALNUM:
2701 SET_TYPE_OFFSET(ucp_Ll);
2702 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2703 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2704 SET_TYPE_OFFSET(ucp_Nd);
2705 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2706 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
2707 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2708 break;
2709 }
2710 cc += 2;
2711 }
2712 #endif
2713
2714 if (jump != NULL)
2715 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
2716 }
2717
2718 if (found != NULL)
2719 set_jumps(found, LABEL());
2720 }
2721
2722 #undef SET_TYPE_OFFSET
2723 #undef SET_CHAR_OFFSET
2724
2725 #endif
2726
2727 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
2728 {
2729 DEFINE_COMPILER;
2730 int length;
2731 unsigned int c, oc, bit;
2732 compare_context context;
2733 struct sljit_jump *jump[4];
2734 #ifdef SUPPORT_UTF8
2735 struct sljit_label *label;
2736 #ifdef SUPPORT_UCP
2737 pcre_uchar propdata[5];
2738 #endif
2739 #endif
2740
2741 switch(type)
2742 {
2743 case OP_SOD:
2744 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2745 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2746 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2747 return cc;
2748
2749 case OP_SOM:
2750 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2752 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
2753 return cc;
2754
2755 case OP_NOT_WORD_BOUNDARY:
2756 case OP_WORD_BOUNDARY:
2757 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
2758 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2759 return cc;
2760
2761 case OP_NOT_DIGIT:
2762 case OP_DIGIT:
2763 check_input_end(common, fallbacks);
2764 read_char8_type(common);
2765 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
2766 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2767 return cc;
2768
2769 case OP_NOT_WHITESPACE:
2770 case OP_WHITESPACE:
2771 check_input_end(common, fallbacks);
2772 read_char8_type(common);
2773 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
2774 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2775 return cc;
2776
2777 case OP_NOT_WORDCHAR:
2778 case OP_WORDCHAR:
2779 check_input_end(common, fallbacks);
2780 read_char8_type(common);
2781 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
2782 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
2783 return cc;
2784
2785 case OP_ANY:
2786 check_input_end(common, fallbacks);
2787 read_char(common);
2788 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2789 {
2790 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
2791 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2792 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2793 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
2794 JUMPHERE(jump[1]);
2795 JUMPHERE(jump[0]);
2796 }
2797 else
2798 check_newlinechar(common, common->nltype, fallbacks, TRUE);
2799 return cc;
2800
2801 case OP_ALLANY:
2802 check_input_end(common, fallbacks);
2803 #ifdef SUPPORT_UTF8
2804 if (common->utf8)
2805 {
2806 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2807 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2808 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2809 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_table4 - 0xc0);
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2811 JUMPHERE(jump[0]);
2812 return cc;
2813 }
2814 #endif
2815 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2816 return cc;
2817
2818 case OP_ANYBYTE:
2819 check_input_end(common, fallbacks);
2820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2821 return cc;
2822
2823 #ifdef SUPPORT_UTF8
2824 #ifdef SUPPORT_UCP
2825 case OP_NOTPROP:
2826 case OP_PROP:
2827 propdata[0] = 0;
2828 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
2829 propdata[2] = cc[0];
2830 propdata[3] = cc[1];
2831 propdata[4] = XCL_END;
2832 compile_xclass_hotpath(common, propdata, fallbacks);
2833 return cc + 2;
2834 #endif
2835 #endif
2836
2837 case OP_ANYNL:
2838 check_input_end(common, fallbacks);
2839 read_char(common);
2840 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2841 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2842 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2843 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
2845 jump[3] = JUMP(SLJIT_JUMP);
2846 JUMPHERE(jump[0]);
2847 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
2848 JUMPHERE(jump[1]);
2849 JUMPHERE(jump[2]);
2850 JUMPHERE(jump[3]);
2851 return cc;
2852
2853 case OP_NOT_HSPACE:
2854 case OP_HSPACE:
2855 check_input_end(common, fallbacks);
2856 read_char(common);
2857 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
2858 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2859 return cc;
2860
2861 case OP_NOT_VSPACE:
2862 case OP_VSPACE:
2863 check_input_end(common, fallbacks);
2864 read_char(common);
2865 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
2866 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2867 return cc;
2868
2869 #ifdef SUPPORT_UCP
2870 case OP_EXTUNI:
2871 check_input_end(common, fallbacks);
2872 read_char(common);
2873 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2874 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2875 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
2876
2877 label = LABEL();
2878 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2879 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2880 read_char(common);
2881 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2882 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
2883 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
2884
2885 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2886 JUMPHERE(jump[0]);
2887 return cc;
2888 #endif
2889
2890 case OP_EODN:
2891 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2892 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2893 {
2894 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2895 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2896 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2897 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
2898 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
2899 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
2900 }
2901 else if (common->nltype == NLTYPE_FIXED)
2902 {
2903 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2904 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2905 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
2906 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2907 }
2908 else
2909 {
2910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2911 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2912 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2913 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
2914 jump[2] = JUMP(SLJIT_C_GREATER);
2915 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
2916 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 1);
2917 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
2918 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
2919
2920 JUMPHERE(jump[1]);
2921 if (common->nltype == NLTYPE_ANYCRLF)
2922 {
2923 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 1);
2924 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
2925 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2926 }
2927 else
2928 {
2929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
2930 read_char(common);
2931 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
2932 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2933 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
2934 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2935 }
2936 JUMPHERE(jump[2]);
2937 JUMPHERE(jump[3]);
2938 }
2939 JUMPHERE(jump[0]);
2940 return cc;
2941
2942 case OP_EOD:
2943 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
2944 return cc;
2945
2946 case OP_CIRC:
2947 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
2948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
2949 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
2950 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
2951 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
2952 return cc;
2953
2954 case OP_CIRCM:
2955 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
2956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
2957 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
2958 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
2959 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
2960 jump[0] = JUMP(SLJIT_JUMP);
2961 JUMPHERE(jump[1]);
2962
2963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, end));
2964 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, STR_PTR, 0));
2965
2966 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2967 {
2968 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
2969 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
2970 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -2);
2971 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), -1);
2972 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
2973 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
2974 }
2975 else
2976 {
2977 skip_char_back(common);
2978 read_char(common);
2979 check_newlinechar(common, common->nltype, fallbacks, FALSE);
2980 }
2981 JUMPHERE(jump[0]);
2982 return cc;
2983
2984 case OP_DOLL:
2985 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
2986 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
2987 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
2988
2989 if (!common->endonly)
2990 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
2991 else
2992 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
2993 return cc;
2994
2995 case OP_DOLLM:
2996 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2997 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
2998 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
2999 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3000 jump[0] = JUMP(SLJIT_JUMP);
3001 JUMPHERE(jump[1]);
3002
3003 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3004 {
3005 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 2);
3006 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3007 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3008 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(STR_PTR), 1);
3009 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3010 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3011 }
3012 else
3013 {
3014 peek_char(common);
3015 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3016 }
3017 JUMPHERE(jump[0]);
3018 return cc;
3019
3020 case OP_CHAR:
3021 case OP_CHARI:
3022 length = 1;
3023 #ifdef SUPPORT_UTF8
3024 if (common->utf8 && *cc >= 0xc0) length += _pcre_utf8_table4[*cc & 0x3f];
3025 #endif
3026 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
3027 {
3028 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3029 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3030
3031 context.length = length;
3032 context.sourcereg = -1;
3033 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3034 context.byteptr = 0;
3035 #endif
3036 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3037 }
3038 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3039 read_char(common);
3040 #ifdef SUPPORT_UTF8
3041 if (common->utf8)
3042 {
3043 GETCHAR(c, cc);
3044 }
3045 else
3046 #endif
3047 c = *cc;
3048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3049 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3051 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3052 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3053 return cc + length;
3054
3055 case OP_NOT:
3056 case OP_NOTI:
3057 length = 1;
3058 #ifdef SUPPORT_UTF8
3059 if (common->utf8)
3060 {
3061 if (*cc >= 0xc0) length += _pcre_utf8_table4[*cc & 0x3f];
3062
3063 check_input_end(common, fallbacks);
3064 GETCHAR(c, cc);
3065
3066 if (c <= 127)
3067 {
3068 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3069 if (type == OP_NOT || !char_has_othercase(common, cc))
3070 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3071 else
3072 {
3073 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3074 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3075 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3076 }
3077 /* Skip the variable-length character. */
3078 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3079 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3080 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)_pcre_utf8_table4 - 0xc0);
3081 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3082 JUMPHERE(jump[0]);
3083 return cc + length;
3084 }
3085 else
3086 read_char(common);
3087 }
3088 else
3089 #endif
3090 {
3091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 1);
3092 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3093 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -1);
3094 c = *cc;
3095 }
3096
3097 if (type == OP_NOT || !char_has_othercase(common, cc))
3098 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3099 else
3100 {
3101 oc = char_othercase(common, c);
3102 bit = c ^ oc;
3103 if (ispowerof2(bit))
3104 {
3105 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3106 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3107 }
3108 else
3109 {
3110 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3111 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3112 }
3113 }
3114 return cc + length;
3115
3116 case OP_CLASS:
3117 case OP_NCLASS:
3118 check_input_end(common, fallbacks);
3119 read_char(common);
3120 #ifdef SUPPORT_UTF8
3121 jump[0] = NULL;
3122 if (common->utf8)
3123 {
3124 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3125 if (type == OP_CLASS)
3126 {
3127 add_jump(compiler, fallbacks, jump[0]);
3128 jump[0] = NULL;
3129 }
3130 }
3131 #endif
3132 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3133 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3134 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3135 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3136 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3137 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3138 #ifdef SUPPORT_UTF8
3139 if (jump[0] != NULL)
3140 JUMPHERE(jump[0]);
3141 #endif
3142 return cc + 32;
3143
3144 #ifdef SUPPORT_UTF8
3145 case OP_XCLASS:
3146 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3147 return cc + GET(cc, 0) - 1;
3148 #endif
3149
3150 case OP_REVERSE:
3151 length = GET(cc, 0);
3152 SLJIT_ASSERT(length > 0);
3153 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3154 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3155 #ifdef SUPPORT_UTF8
3156 if (common->utf8)
3157 {
3158 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3159 label = LABEL();
3160 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0));
3161 skip_char_back(common);
3162 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3163 JUMPTO(SLJIT_C_NOT_ZERO, label);
3164 return cc + LINK_SIZE;
3165 }
3166 #endif
3167 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, length);
3168 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3169 return cc + LINK_SIZE;
3170 }
3171 SLJIT_ASSERT_STOP();
3172 return cc;
3173 }
3174
3175 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3176 {
3177 /* This function consumes at least one input character. */
3178 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3179 DEFINE_COMPILER;
3180 pcre_uchar *ccbegin = cc;
3181 compare_context context;
3182 int size;
3183
3184 context.length = 0;
3185 do
3186 {
3187 if (cc >= ccend)
3188 break;
3189
3190 if (*cc == OP_CHAR)
3191 {
3192 size = 1;
3193 #ifdef SUPPORT_UTF8
3194 if (common->utf8 && cc[1] >= 0xc0)
3195 size += _pcre_utf8_table4[cc[1] & 0x3f];
3196 #endif
3197 }
3198 else if (*cc == OP_CHARI)
3199 {
3200 size = 1;
3201 #ifdef SUPPORT_UTF8
3202 if (common->utf8)
3203 {
3204 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3205 size = 0;
3206 else if (cc[1] >= 0xc0)
3207 size += _pcre_utf8_table4[cc[1] & 0x3f];
3208 }
3209 else
3210 #endif
3211 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3212 size = 0;
3213 }
3214 else
3215 size = 0;
3216
3217 cc += 1 + size;
3218 context.length += size;
3219 }
3220 while (size > 0 && context.length <= 128);
3221
3222 cc = ccbegin;
3223 if (context.length > 0)
3224 {
3225 /* We have a fixed-length byte sequence. */
3226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3227 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3228
3229 context.sourcereg = -1;
3230 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3231 context.byteptr = 0;
3232 #endif
3233 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3234 return cc;
3235 }
3236
3237 /* A non-fixed length character will be checked if length == 0. */
3238 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3239 }
3240
3241 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3242 {
3243 DEFINE_COMPILER;
3244 int offset = GET2(cc, 1) << 1;
3245
3246 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3247 if (!common->jscript_compat)
3248 {
3249 if (fallbacks == NULL)
3250 {
3251 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3252 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3253 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3254 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3255 return JUMP(SLJIT_C_NOT_ZERO);
3256 }
3257 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3258 }
3259 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3260 }
3261
3262 /* Forward definitions. */
3263 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3264 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3265
3266 #define PUSH_FALLBACK(size, ccstart, error) \
3267 do \
3268 { \
3269 fallback = sljit_alloc_memory(compiler, (size)); \
3270 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3271 return error; \
3272 memset(fallback, 0, size); \
3273 fallback->prev = parent->top; \
3274 fallback->cc = (ccstart); \
3275 parent->top = fallback; \
3276 } \
3277 while (0)
3278
3279 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3280 do \
3281 { \
3282 fallback = sljit_alloc_memory(compiler, (size)); \
3283 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3284 return; \
3285 memset(fallback, 0, size); \
3286 fallback->prev = parent->top; \
3287 fallback->cc = (ccstart); \
3288 parent->top = fallback; \
3289 } \
3290 while (0)
3291
3292 #define FALLBACK_AS(type) ((type*)fallback)
3293
3294 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3295 {
3296 DEFINE_COMPILER;
3297 int offset = GET2(cc, 1) << 1;
3298 struct sljit_jump *jump = NULL;
3299
3300 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3301 if (withchecks && !common->jscript_compat)
3302 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3303
3304 #ifdef SUPPORT_UTF8
3305 #ifdef SUPPORT_UCP
3306 if (common->utf8 && *cc == OP_REFI)
3307 {
3308 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3309 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3310 if (withchecks)
3311 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3312
3313 /* Needed to save important temporary registers. */
3314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3315 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3317 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3318 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3319 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3320 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3321 }
3322 else
3323 #endif
3324 #endif
3325 {
3326 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3327 if (withchecks)
3328 jump = JUMP(SLJIT_C_ZERO);
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3330
3331 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3332 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3333 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3334 }
3335
3336 if (jump != NULL)
3337 {
3338 if (emptyfail)
3339 add_jump(compiler, fallbacks, jump);
3340 else
3341 JUMPHERE(jump);
3342 }
3343 return cc + 3;
3344 }
3345
3346 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3347 {
3348 DEFINE_COMPILER;
3349 fallback_common *fallback;
3350 pcre_uchar type;
3351 struct sljit_label *label;
3352 struct sljit_jump *zerolength;
3353 struct sljit_jump *jump = NULL;
3354 pcre_uchar *ccbegin = cc;
3355 int min = 0, max = 0;
3356 BOOL minimize;
3357
3358 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3359
3360 type = cc[3];
3361 minimize = (type & 0x1) != 0;
3362 switch(type)
3363 {
3364 case OP_CRSTAR:
3365 case OP_CRMINSTAR:
3366 min = 0;
3367 max = 0;
3368 cc += 4;
3369 break;
3370 case OP_CRPLUS:
3371 case OP_CRMINPLUS:
3372 min = 1;
3373 max = 0;
3374 cc += 4;
3375 break;
3376 case OP_CRQUERY:
3377 case OP_CRMINQUERY:
3378 min = 0;
3379 max = 1;
3380 cc += 4;
3381 break;
3382 case OP_CRRANGE:
3383 case OP_CRMINRANGE:
3384 min = GET2(cc, 3 + 1);
3385 max = GET2(cc, 3 + 3);
3386 cc += 8;
3387 break;
3388 default:
3389 SLJIT_ASSERT_STOP();
3390 break;
3391 }
3392
3393 if (!minimize)
3394 {
3395 if (min == 0)
3396 {
3397 allocate_stack(common, 2);
3398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3400 /* Temporary release of STR_PTR. */
3401 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3402 zerolength = compile_ref_checks(common, ccbegin, NULL);
3403 /* Restore if not zero length. */
3404 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3405 }
3406 else
3407 {
3408 allocate_stack(common, 1);
3409 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3410 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3411 }
3412
3413 if (min > 1 || max > 1)
3414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3415
3416 label = LABEL();
3417 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3418
3419 if (min > 1 || max > 1)
3420 {
3421 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3422 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3424 if (min > 1)
3425 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3426 if (max > 1)
3427 {
3428 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3429 allocate_stack(common, 1);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3431 JUMPTO(SLJIT_JUMP, label);
3432 JUMPHERE(jump);
3433 }
3434 }
3435
3436 if (max == 0)
3437 {
3438 /* Includes min > 1 case as well. */
3439 allocate_stack(common, 1);
3440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3441 JUMPTO(SLJIT_JUMP, label);
3442 }
3443
3444 JUMPHERE(zerolength);
3445 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3446
3447 decrease_call_count(common);
3448 return cc;
3449 }
3450
3451 allocate_stack(common, 2);
3452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3453 if (type != OP_CRMINSTAR)
3454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3455
3456 if (min == 0)
3457 {
3458 zerolength = compile_ref_checks(common, ccbegin, NULL);
3459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3460 jump = JUMP(SLJIT_JUMP);
3461 }
3462 else
3463 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3464
3465 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3466 if (max > 0)
3467 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3468
3469 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3471
3472 if (min > 1)
3473 {
3474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3475 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3477 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3478 }
3479 else if (max > 0)
3480 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3481
3482 if (jump != NULL)
3483 JUMPHERE(jump);
3484 JUMPHERE(zerolength);
3485
3486 decrease_call_count(common);
3487 return cc;
3488 }
3489
3490 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3491 {
3492 DEFINE_COMPILER;
3493 fallback_common *fallback;
3494 recurse_entry *entry = common->entries;
3495 recurse_entry *prev = NULL;
3496 int start = GET(cc, 1);
3497
3498 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3499 while (entry != NULL)
3500 {
3501 if (entry->start == start)
3502 break;
3503 prev = entry;
3504 entry = entry->next;
3505 }
3506
3507 if (entry == NULL)
3508 {
3509 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3510 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3511 return NULL;
3512 entry->next = NULL;
3513 entry->entry = NULL;
3514 entry->calls = NULL;
3515 entry->start = start;
3516
3517 if (prev != NULL)
3518 prev->next = entry;
3519 else
3520 common->entries = entry;
3521 }
3522
3523 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3524 allocate_stack(common, 1);
3525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3526
3527 if (entry->entry == NULL)
3528 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3529 else
3530 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3531 /* Leave if the match is failed. */
3532 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
3533 return cc + 1 + LINK_SIZE;
3534 }
3535
3536 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
3537 {
3538 DEFINE_COMPILER;
3539 int framesize;
3540 int localptr;
3541 fallback_common altfallback;
3542 pcre_uchar *ccbegin;
3543 pcre_uchar opcode;
3544 pcre_uchar bra = OP_BRA;
3545 jump_list *tmp = NULL;
3546 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
3547 jump_list **found;
3548 /* Saving previous accept variables. */
3549 struct sljit_label *save_acceptlabel = common->acceptlabel;
3550 struct sljit_jump *jump;
3551 struct sljit_jump *brajump = NULL;
3552 jump_list *save_accept = common->accept;
3553
3554 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3555 {
3556 SLJIT_ASSERT(!conditional);
3557 bra = *cc;
3558 cc++;
3559 }
3560 localptr = PRIV(cc);
3561 SLJIT_ASSERT(localptr != 0);
3562 framesize = get_framesize(common, cc, FALSE);
3563 fallback->framesize = framesize;
3564 fallback->localptr = localptr;
3565 opcode = *cc;
3566 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
3567 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
3568 ccbegin = cc;
3569 cc += GET(cc, 1);
3570
3571 if (bra == OP_BRAMINZERO)
3572 {
3573 /* This is a braminzero fallback path. */
3574 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3575 free_stack(common, 1);
3576 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
3577 }
3578
3579 if (framesize < 0)
3580 {
3581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
3582 allocate_stack(common, 1);
3583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3584 }
3585 else
3586 {
3587 allocate_stack(common, framesize + 2);
3588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3589 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
3590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
3591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3593 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
3594 }
3595
3596 memset(&altfallback, 0, sizeof(fallback_common));
3597 while (1)
3598 {
3599 common->acceptlabel = NULL;
3600 common->accept = NULL;
3601 altfallback.top = NULL;
3602 altfallback.topfallbacks = NULL;
3603
3604 if (*ccbegin == OP_ALT)
3605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3606
3607 altfallback.cc = ccbegin;
3608 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
3609 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3610 {
3611 common->acceptlabel = save_acceptlabel;
3612 common->accept = save_accept;
3613 return NULL;
3614 }
3615 common->acceptlabel = LABEL();
3616 if (common->accept != NULL)
3617 set_jumps(common->accept, common->acceptlabel);
3618
3619 /* Reset stack. */
3620 if (framesize < 0)
3621 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3622 else {
3623 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
3624 {
3625 /* We don't need to keep the STR_PTR, only the previous localptr. */
3626 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3627 }
3628 else
3629 {
3630 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3631 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3632 }
3633 }
3634
3635 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
3636 {
3637 /* We know that STR_PTR was stored on the top of the stack. */
3638 if (conditional)
3639 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3640 else if (bra == OP_BRAZERO)
3641 {
3642 if (framesize < 0)
3643 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3644 else
3645 {
3646 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3647 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
3648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3649 }
3650 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3652 }
3653 else if (framesize >= 0)
3654 {
3655 /* For OP_BRA and OP_BRAMINZERO. */
3656 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3657 }
3658 }
3659 add_jump(compiler, found, JUMP(SLJIT_JUMP));
3660
3661 compile_fallbackpath(common, altfallback.top);
3662 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3663 {
3664 common->acceptlabel = save_acceptlabel;
3665 common->accept = save_accept;
3666 return NULL;
3667 }
3668 set_jumps(altfallback.topfallbacks, LABEL());
3669
3670 if (*cc != OP_ALT)
3671 break;
3672
3673 ccbegin = cc;
3674 cc += GET(cc, 1);
3675 }
3676 /* None of them matched. */
3677
3678 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
3679 {
3680 /* Assert is failed. */
3681 if (conditional || bra == OP_BRAZERO)
3682 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3683
3684 if (framesize < 0)
3685 {
3686 /* The topmost item should be 0. */
3687 if (bra == OP_BRAZERO)
3688 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3689 else
3690 free_stack(common, 1);
3691 }
3692 else
3693 {
3694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3695 /* The topmost item should be 0. */
3696 if (bra == OP_BRAZERO)
3697 {
3698 free_stack(common, framesize + 1);
3699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3700 }
3701 else
3702 free_stack(common, framesize + 2);
3703 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3704 }
3705 jump = JUMP(SLJIT_JUMP);
3706 if (bra != OP_BRAZERO)
3707 add_jump(compiler, target, jump);
3708
3709 /* Assert is successful. */
3710 set_jumps(tmp, LABEL());
3711 if (framesize < 0)
3712 {
3713 /* We know that STR_PTR was stored on the top of the stack. */
3714 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3715 /* Keep the STR_PTR on the top of the stack. */
3716 if (bra == OP_BRAZERO)
3717 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3718 else if (bra == OP_BRAMINZERO)
3719 {
3720 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3722 }
3723 }
3724 else
3725 {
3726 if (bra == OP_BRA)
3727 {
3728 /* We don't need to keep the STR_PTR, only the previous localptr. */
3729 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
3730 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
3731 }
3732 else
3733 {
3734 /* We don't need to keep the STR_PTR, only the previous localptr. */
3735 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
3736 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
3738 }
3739 }
3740
3741 if (bra == OP_BRAZERO)
3742 {
3743 fallback->hotpath = LABEL();
3744 sljit_set_label(jump, fallback->hotpath);
3745 }
3746 else if (bra == OP_BRAMINZERO)
3747 {
3748 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3749 JUMPHERE(brajump);
3750 if (framesize >= 0)
3751 {
3752 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
3753 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
3754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
3755 }
3756 set_jumps(fallback->common.topfallbacks, LABEL());
3757 }
3758 }
3759 else
3760 {
3761 /* AssertNot is successful. */
3762 if (framesize < 0)
3763 {
3764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3765 if (bra != OP_BRA)
3766 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3767 else
3768 free_stack(common, 1);
3769 }
3770 else
3771 {
3772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
3773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3774 /* The topmost item should be 0. */
3775 if (bra != OP_BRA)
3776 {
3777 free_stack(common, framesize + 1);
3778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3779 }
3780 else
3781 free_stack(common, framesize + 2);
3782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
3783 }
3784
3785 if (bra == OP_BRAZERO)
3786 fallback->hotpath = LABEL();
3787 else if (bra == OP_BRAMINZERO)
3788 {
3789 JUMPTO(SLJIT_JUMP, fallback->hotpath);
3790 JUMPHERE(brajump);
3791 }
3792
3793 if (bra != OP_BRA)
3794 {
3795 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
3796 set_jumps(fallback->common.topfallbacks, LABEL());
3797 fallback->common.topfallbacks = NULL;
3798 }
3799 }
3800
3801 common->acceptlabel = save_acceptlabel;
3802 common->accept = save_accept;
3803 return cc + 1 + LINK_SIZE;
3804 }
3805
3806 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
3807 {
3808 int condition = FALSE;
3809 pcre_uchar *slotA = name_table;
3810 pcre_uchar *slotB;
3811 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3812 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3813 sljit_w no_capture;
3814 int i;
3815
3816 locals += OVECTOR_START / sizeof(sljit_w);
3817 no_capture = locals[1];
3818
3819 for (i = 0; i < name_count; i++)
3820 {
3821 if (GET2(slotA, 0) == refno) break;
3822 slotA += name_entry_size;
3823 }
3824
3825 if (i < name_count)
3826 {
3827 /* Found a name for the number - there can be only one; duplicate names
3828 for different numbers are allowed, but not vice versa. First scan down
3829 for duplicates. */
3830
3831 slotB = slotA;
3832 while (slotB > name_table)
3833 {
3834 slotB -= name_entry_size;
3835 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3836 {
3837 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3838 if (condition) break;
3839 }
3840 else break;
3841 }
3842
3843 /* Scan up for duplicates */
3844 if (!condition)
3845 {
3846 slotB = slotA;
3847 for (i++; i < name_count; i++)
3848 {
3849 slotB += name_entry_size;
3850 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3851 {
3852 condition = locals[GET2(slotB, 0) << 1] != no_capture;
3853 if (condition) break;
3854 }
3855 else break;
3856 }
3857 }
3858 }
3859 return condition;
3860 }
3861
3862 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
3863 {
3864 int condition = FALSE;
3865 pcre_uchar *slotA = name_table;
3866 pcre_uchar *slotB;
3867 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
3868 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
3869 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
3870 int i;
3871
3872 for (i = 0; i < name_count; i++)
3873 {
3874 if (GET2(slotA, 0) == recno) break;
3875 slotA += name_entry_size;
3876 }
3877
3878 if (i < name_count)
3879 {
3880 /* Found a name for the number - there can be only one; duplicate
3881 names for different numbers are allowed, but not vice versa. First
3882 scan down for duplicates. */
3883
3884 slotB = slotA;
3885 while (slotB > name_table)
3886 {
3887 slotB -= name_entry_size;
3888 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3889 {
3890 condition = GET2(slotB, 0) == group_num;
3891 if (condition) break;
3892 }
3893 else break;
3894 }
3895
3896 /* Scan up for duplicates */
3897 if (!condition)
3898 {
3899 slotB = slotA;
3900 for (i++; i < name_count; i++)
3901 {
3902 slotB += name_entry_size;
3903 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
3904 {
3905 condition = GET2(slotB, 0) == group_num;
3906 if (condition) break;
3907 }
3908 else break;
3909 }
3910 }
3911 }
3912 return condition;
3913 }
3914
3915 /*
3916 Handling bracketed expressions is probably the most complex part.
3917
3918 Stack layout naming characters:
3919 S - Push the current STR_PTR
3920 0 - Push a 0 (NULL)
3921 A - Push the current STR_PTR. Needed for restoring the STR_PTR
3922 before the next alternative. Not pushed if there are no alternatives.
3923 M - Any values pushed by the current alternative. Can be empty, or anything.
3924 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
3925 L - Push the previous local (pointed by localptr) to the stack
3926 () - opional values stored on the stack
3927 ()* - optonal, can be stored multiple times
3928
3929 The following list shows the regular expression templates, their PCRE byte codes
3930 and stack layout supported by pcre-sljit.
3931
3932 (?:) OP_BRA | OP_KET A M
3933 () OP_CBRA | OP_KET C M
3934 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
3935 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
3936 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
3937 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
3938 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
3939 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
3940 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
3941 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
3942 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
3943 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
3944 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
3945 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
3946 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
3947 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
3948 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
3949 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
3950 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
3951 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
3952 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
3953 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
3954
3955
3956 Stack layout naming characters:
3957 A - Push the alternative index (starting from 0) on the stack.
3958 Not pushed if there is no alternatives.
3959 M - Any values pushed by the current alternative. Can be empty, or anything.
3960
3961 The next list shows the possible content of a bracket:
3962 (|) OP_*BRA | OP_ALT ... M A
3963 (?()|) OP_*COND | OP_ALT M A
3964 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
3965 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
3966 Or nothing, if trace is unnecessary
3967 */
3968
3969 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3970 {
3971 DEFINE_COMPILER;
3972 fallback_common *fallback;
3973 pcre_uchar opcode;
3974 int localptr = 0;
3975 int offset = 0;
3976 int stacksize;
3977 pcre_uchar *ccbegin;
3978 pcre_uchar *hotpath;
3979 pcre_uchar bra = OP_BRA;
3980 pcre_uchar ket;
3981 assert_fallback *assert;
3982 BOOL has_alternatives;
3983 struct sljit_jump *jump;
3984 struct sljit_jump *skip;
3985 struct sljit_label *rmaxlabel = NULL;
3986 struct sljit_jump *braminzerojump = NULL;
3987
3988 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
3989
3990 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
3991 {
3992 bra = *cc;
3993 cc++;
3994 opcode = *cc;
3995 }
3996
3997 opcode = *cc;
3998 ccbegin = cc;
3999 hotpath = ccbegin + 1 + LINK_SIZE;
4000
4001 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4002 {
4003 /* Drop this bracket_fallback. */
4004 parent->top = fallback->prev;
4005 return bracketend(cc);
4006 }
4007
4008 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4009 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4010 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4011 cc += GET(cc, 1);
4012
4013 has_alternatives = *cc == OP_ALT;
4014 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4015 {
4016 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4017 if (*hotpath == OP_NRREF)
4018 {
4019 stacksize = GET2(hotpath, 1);
4020 if (common->currententry == NULL || stacksize == RREF_ANY)
4021 has_alternatives = FALSE;
4022 else if (common->currententry->start == 0)
4023 has_alternatives = stacksize != 0;
4024 else
4025 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4026 }
4027 }
4028
4029 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4030 opcode = OP_SCOND;
4031 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4032 opcode = OP_ONCE;
4033
4034 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4035 {
4036 /* Capturing brackets has a pre-allocated space. */
4037 offset = GET2(ccbegin, 1 + LINK_SIZE);
4038 localptr = OVECTOR_PRIV(offset);
4039 offset <<= 1;
4040 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4041 hotpath += 2;
4042 }
4043 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4044 {
4045 /* Other brackets simply allocate the next entry. */
4046 localptr = PRIV(ccbegin);
4047 SLJIT_ASSERT(localptr != 0);
4048 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4049 if (opcode == OP_ONCE)
4050 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4051 }
4052
4053 /* Instructions before the first alternative. */
4054 stacksize = 0;
4055 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4056 stacksize++;
4057 if (bra == OP_BRAZERO)
4058 stacksize++;
4059
4060 if (stacksize > 0)
4061 allocate_stack(common, stacksize);
4062
4063 stacksize = 0;
4064 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4065 {
4066 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4067 stacksize++;
4068 }
4069
4070 if (bra == OP_BRAZERO)
4071 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4072
4073 if (bra == OP_BRAMINZERO)
4074 {
4075 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4076 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4077 if (ket != OP_KETRMIN)
4078 {
4079 free_stack(common, 1);
4080 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4081 }
4082 else
4083 {
4084 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4085 {
4086 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4087 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4088 /* Nothing stored during the first run. */
4089 skip = JUMP(SLJIT_JUMP);
4090 JUMPHERE(jump);
4091 /* Checking zero-length iteration. */
4092 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4093 {
4094 /* When we come from outside, localptr contains the previous STR_PTR. */
4095 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4096 }
4097 else
4098 {
4099 /* Except when the whole stack frame must be saved. */
4100 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4101 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4102 }
4103 JUMPHERE(skip);
4104 }
4105 else
4106 {
4107 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4108 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4109 JUMPHERE(jump);
4110 }
4111 }
4112 }
4113
4114 if (ket == OP_KETRMIN)
4115 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4116
4117 if (ket == OP_KETRMAX)
4118 {
4119 rmaxlabel = LABEL();
4120 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4121 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4122 }
4123
4124 /* Handling capturing brackets and alternatives. */
4125 if (opcode == OP_ONCE)
4126 {
4127 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4128 {
4129 /* Neither capturing brackets nor recursions are not found in the block. */
4130 if (ket == OP_KETRMIN)
4131 {
4132 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4133 allocate_stack(common, 2);
4134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4136 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4137 }
4138 else if (ket == OP_KETRMAX || has_alternatives)
4139 {
4140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4141 allocate_stack(common, 1);
4142 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4143 }
4144 else
4145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4146 }
4147 else
4148 {
4149 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4150 {
4151 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4153 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4157 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4158 }
4159 else
4160 {
4161 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4163 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4166 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4167 }
4168 }
4169 }
4170 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4171 {
4172 /* Saving the previous values. */
4173 allocate_stack(common, 3);
4174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4175 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4176 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4178 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4181 }
4182 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4183 {
4184 /* Saving the previous value. */
4185 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4186 allocate_stack(common, 1);
4187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4189 }
4190 else if (has_alternatives)
4191 {
4192 /* Pushing the starting string pointer. */
4193 allocate_stack(common, 1);
4194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4195 }
4196
4197 /* Generating code for the first alternative. */
4198 if (opcode == OP_COND || opcode == OP_SCOND)
4199 {
4200 if (*hotpath == OP_CREF)
4201 {
4202 SLJIT_ASSERT(has_alternatives);
4203 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4204 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4205 hotpath += 3;
4206 }
4207 else if (*hotpath == OP_NCREF)
4208 {
4209 SLJIT_ASSERT(has_alternatives);
4210 stacksize = GET2(hotpath, 1);
4211 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4212
4213 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4216 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4217 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4218 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4219 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4220 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4221 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4222
4223 JUMPHERE(jump);
4224 hotpath += 3;
4225 }
4226 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4227 {
4228 /* Never has other case. */
4229 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4230
4231 stacksize = GET2(hotpath, 1);
4232 if (common->currententry == NULL)
4233 stacksize = 0;
4234 else if (stacksize == RREF_ANY)
4235 stacksize = 1;
4236 else if (common->currententry->start == 0)
4237 stacksize = stacksize == 0;
4238 else
4239 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4240
4241 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4242 {
4243 SLJIT_ASSERT(!has_alternatives);
4244 if (stacksize != 0)
4245 hotpath += 3;
4246 else
4247 {
4248 if (*cc == OP_ALT)
4249 {
4250 hotpath = cc + 1 + LINK_SIZE;
4251 cc += GET(cc, 1);
4252 }
4253 else
4254 hotpath = cc;
4255 }
4256 }
4257 else
4258 {
4259 SLJIT_ASSERT(has_alternatives);
4260
4261 stacksize = GET2(hotpath, 1);
4262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4266 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4267 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4268 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4269 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4270 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4271 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4272 hotpath += 3;
4273 }
4274 }
4275 else
4276 {
4277 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4278 /* Similar code as PUSH_FALLBACK macro. */
4279 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4280 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4281 return NULL;
4282 memset(assert, 0, sizeof(assert_fallback));
4283 assert->common.cc = hotpath;
4284 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4285 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4286 }
4287 }
4288
4289 compile_hotpath(common, hotpath, cc, fallback);
4290 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4291 return NULL;
4292
4293 if (opcode == OP_ONCE)
4294 {
4295 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4296 {
4297 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4298 /* TMP2 which is set here used by OP_KETRMAX below. */
4299 if (ket == OP_KETRMAX)
4300 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4301 else if (ket == OP_KETRMIN)
4302 {
4303 /* Move the STR_PTR to the localptr. */
4304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4305 }
4306 }
4307 else
4308 {
4309 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4310 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4311 if (ket == OP_KETRMAX)
4312 {
4313 /* TMP2 which is set here used by OP_KETRMAX below. */
4314 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4315 }
4316 }
4317 }
4318
4319 stacksize = 0;
4320 if (ket != OP_KET || bra != OP_BRA)
4321 stacksize++;
4322 if (has_alternatives && opcode != OP_ONCE)
4323 stacksize++;
4324
4325 if (stacksize > 0)
4326 allocate_stack(common, stacksize);
4327
4328 stacksize = 0;
4329 if (ket != OP_KET)
4330 {
4331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4332 stacksize++;
4333 }
4334 else if (bra != OP_BRA)
4335 {
4336 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4337 stacksize++;
4338 }
4339
4340 if (has_alternatives)
4341 {
4342 if (opcode != OP_ONCE)
4343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4344 if (ket != OP_KETRMAX)
4345 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4346 }
4347
4348 /* Must be after the hotpath label. */
4349 if (offset != 0)
4350 {
4351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4352 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4354 }
4355
4356 if (ket == OP_KETRMAX)
4357 {
4358 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4359 {
4360 if (has_alternatives)
4361 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4362 /* Checking zero-length iteration. */
4363 if (opcode != OP_ONCE)
4364 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4365 else
4366 /* TMP2 must contain the starting STR_PTR. */
4367 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4368 }
4369 else
4370 JUMPTO(SLJIT_JUMP, rmaxlabel);
4371 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4372 }
4373
4374 if (bra == OP_BRAZERO)
4375 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4376
4377 if (bra == OP_BRAMINZERO)
4378 {
4379 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4380 JUMPTO(SLJIT_JUMP, ((braminzero_fallback*)parent)->hotpath);
4381 if (braminzerojump != NULL)
4382 {
4383 JUMPHERE(braminzerojump);
4384 /* We need to release the end pointer to perform the
4385 fallback for the zero-length iteration. When
4386 framesize is < 0, OP_ONCE will do the release itself. */
4387 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4388 {
4389 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4390 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4391 }
4392 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4393 free_stack(common, 1);
4394 }
4395 /* Continue to the normal fallback. */
4396 }
4397
4398 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4399 decrease_call_count(common);
4400
4401 /* Skip the other alternatives. */
4402 while (*cc == OP_ALT)
4403 cc += GET(cc, 1);
4404 cc += 1 + LINK_SIZE;
4405 return cc;
4406 }
4407
4408 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4409 {
4410 DEFINE_COMPILER;
4411 fallback_common *fallback;
4412 pcre_uchar opcode;
4413 int localptr;
4414 int cbraprivptr = 0;
4415 int framesize;
4416 int stacksize;
4417 int offset = 0;
4418 BOOL zero = FALSE;
4419 pcre_uchar *ccbegin = NULL;
4420 int stack;
4421 struct sljit_label *loop = NULL;
4422 struct jump_list *emptymatch = NULL;
4423
4424 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4425 if (*cc == OP_BRAPOSZERO)
4426 {
4427 zero = TRUE;
4428 cc++;
4429 }
4430
4431 opcode = *cc;
4432 localptr = PRIV(cc);
4433 SLJIT_ASSERT(localptr != 0);
4434 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4435 switch(opcode)
4436 {
4437 case OP_BRAPOS:
4438 case OP_SBRAPOS:
4439 ccbegin = cc + 1 + LINK_SIZE;
4440 break;
4441
4442 case OP_CBRAPOS:
4443 case OP_SCBRAPOS:
4444 offset = GET2(cc, 1 + LINK_SIZE);
4445 cbraprivptr = OVECTOR_PRIV(offset);
4446 offset <<= 1;
4447 ccbegin = cc + 1 + LINK_SIZE + 2;
4448 break;
4449
4450 default:
4451 SLJIT_ASSERT_STOP();
4452 break;
4453 }
4454
4455 framesize = get_framesize(common, cc, FALSE);
4456 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4457 if (framesize < 0)
4458 {
4459 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4460 if (!zero)
4461 stacksize++;
4462 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4463 allocate_stack(common, stacksize);
4464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4465
4466 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4467 {
4468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4472 }
4473 else
4474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4475
4476 if (!zero)
4477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4478 }
4479 else
4480 {
4481 stacksize = framesize + 1;
4482 if (!zero)
4483 stacksize++;
4484 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4485 stacksize++;
4486 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4487 allocate_stack(common, stacksize);
4488
4489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4490 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4492 stack = 0;
4493 if (!zero)
4494 {
4495 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4496 stack++;
4497 }
4498 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4499 {
4500 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4501 stack++;
4502 }
4503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4504 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4505 }
4506
4507 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4509
4510 loop = LABEL();
4511 while (*cc != OP_KETRPOS)
4512 {
4513 fallback->top = NULL;
4514 fallback->topfallbacks = NULL;
4515 cc += GET(cc, 1);
4516
4517 compile_hotpath(common, ccbegin, cc, fallback);
4518 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4519 return NULL;
4520
4521 if (framesize < 0)
4522 {
4523 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4524
4525 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4526 {
4527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4528 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4529 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4531 }
4532 else
4533 {
4534 if (opcode == OP_SBRAPOS)
4535 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4537 }
4538
4539 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4540 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4541
4542 if (!zero)
4543 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4544 }
4545 else
4546 {
4547 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4548 {
4549 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
4550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4553 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4554 }
4555 else
4556 {
4557 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4558 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
4559 if (opcode == OP_SBRAPOS)
4560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4561 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
4562 }
4563
4564 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
4565 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
4566
4567 if (!zero)
4568 {
4569 if (framesize < 0)
4570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
4571 else
4572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4573 }
4574 }
4575 JUMPTO(SLJIT_JUMP, loop);
4576 flush_stubs(common);
4577
4578 compile_fallbackpath(common, fallback->top);
4579 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4580 return NULL;
4581 set_jumps(fallback->topfallbacks, LABEL());
4582
4583 if (framesize < 0)
4584 {
4585 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4586 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4587 else
4588 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4589 }
4590 else
4591 {
4592 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4593 {
4594 /* Last alternative. */
4595 if (*cc == OP_KETRPOS)
4596 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4597 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4598 }
4599 else
4600 {
4601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4602 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
4603 }
4604 }
4605
4606 if (*cc == OP_KETRPOS)
4607 break;
4608 ccbegin = cc + 1 + LINK_SIZE;
4609 }
4610
4611 fallback->topfallbacks = NULL;
4612 if (!zero)
4613 {
4614 if (framesize < 0)
4615 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
4616 else /* TMP2 is set to [localptr] above. */
4617 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
4618 }
4619
4620 /* None of them matched. */
4621 set_jumps(emptymatch, LABEL());
4622 decrease_call_count(common);
4623 return cc + 1 + LINK_SIZE;
4624 }
4625
4626 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
4627 {
4628 int class_len;
4629
4630 *opcode = *cc;
4631 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
4632 {
4633 cc++;
4634 *type = OP_CHAR;
4635 }
4636 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
4637 {
4638 cc++;
4639 *type = OP_CHARI;
4640 *opcode -= OP_STARI - OP_STAR;
4641 }
4642 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
4643 {
4644 cc++;
4645 *type = OP_NOT;
4646 *opcode -= OP_NOTSTAR - OP_STAR;
4647 }
4648 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
4649 {
4650 cc++;
4651 *type = OP_NOTI;
4652 *opcode -= OP_NOTSTARI - OP_STAR;
4653 }
4654 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
4655 {
4656 cc++;
4657 *opcode -= OP_TYPESTAR - OP_STAR;
4658 *type = 0;
4659 }
4660 else
4661 {
4662 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
4663 *type = *opcode;
4664 cc++;
4665 class_len = (*type < OP_XCLASS) ? 33 : GET(cc, 0);
4666 *opcode = cc[class_len - 1];
4667 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
4668 {
4669 *opcode -= OP_CRSTAR - OP_STAR;
4670 if (end != NULL)
4671 *end = cc + class_len;
4672 }
4673 else
4674 {
4675 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
4676 *arg1 = GET2(cc, (class_len + 2));
4677 *arg2 = GET2(cc, class_len);
4678
4679 if (*arg2 == 0)
4680 {
4681 SLJIT_ASSERT(*arg1 != 0);
4682 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
4683 }
4684 if (*arg1 == *arg2)
4685 *opcode = OP_EXACT;
4686
4687 if (end != NULL)
4688 *end = cc + class_len + 4;
4689 }
4690 return cc;
4691 }
4692
4693 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
4694 {
4695 *arg1 = GET2(cc, 0);
4696 cc += 2;
4697 }
4698
4699 if (*type == 0)
4700 {
4701 *type = *cc;
4702 if (end != NULL)
4703 *end = next_opcode(common, cc);
4704 cc++;
4705 return cc;
4706 }
4707
4708 if (end != NULL)
4709 {
4710 *end = cc + 1;
4711 #ifdef SUPPORT_UTF8
4712 if (common->utf8 && *cc >= 0xc0) *end += _pcre_utf8_table4[*cc & 0x3f];
4713 #endif
4714 }
4715 return cc;
4716 }
4717
4718 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4719 {
4720 DEFINE_COMPILER;
4721 fallback_common *fallback;
4722 pcre_uchar opcode;
4723 pcre_uchar type;
4724 int arg1 = -1, arg2 = -1;
4725 pcre_uchar* end;
4726 jump_list *nomatch = NULL;
4727 struct sljit_jump *jump = NULL;
4728 struct sljit_label *label;
4729
4730 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
4731
4732 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
4733
4734 switch(opcode)
4735 {
4736 case OP_STAR:
4737 case OP_PLUS:
4738 case OP_UPTO:
4739 case OP_CRRANGE:
4740 if (type == OP_ANYNL || type == OP_EXTUNI)
4741 {
4742 if (opcode == OP_STAR || opcode == OP_UPTO)
4743 {
4744 allocate_stack(common, 2);
4745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4747 }
4748 else
4749 {
4750 allocate_stack(common, 1);
4751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4752 }
4753 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4755
4756 label = LABEL();
4757 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4758 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
4759 {
4760 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4761 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4762 if (opcode == OP_CRRANGE && arg2 > 0)
4763 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
4764 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
4765 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
4766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4767 }
4768
4769 allocate_stack(common, 1);
4770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4771 JUMPTO(SLJIT_JUMP, label);
4772 if (jump != NULL)
4773 JUMPHERE(jump);
4774 }
4775 else
4776 {
4777 allocate_stack(common, 2);
4778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4780 label = LABEL();
4781 compile_char1_hotpath(common, type, cc, &nomatch);
4782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4783 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
4784 {
4785 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4786 JUMPTO(SLJIT_JUMP, label);
4787 }
4788 else
4789 {
4790 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4791 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4793 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4794 }
4795 set_jumps(nomatch, LABEL());
4796 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
4797 add_jump(compiler, &fallback->topfallbacks,
4798 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
4799 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4800 }
4801 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4802 break;
4803
4804 case OP_MINSTAR:
4805 case OP_MINPLUS:
4806 allocate_stack(common, 1);
4807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4808 if (opcode == OP_MINPLUS)
4809 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4810 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4811 break;
4812
4813 case OP_MINUPTO:
4814 case OP_CRMINRANGE:
4815 allocate_stack(common, 2);
4816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4818 if (opcode == OP_CRMINRANGE)
4819 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4820 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4821 break;
4822
4823 case OP_QUERY:
4824 case OP_MINQUERY:
4825 allocate_stack(common, 1);
4826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4827 if (opcode == OP_QUERY)
4828 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4829 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
4830 break;
4831
4832 case OP_EXACT:
4833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4834 label = LABEL();
4835 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
4836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4837 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4838 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4839 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4840 break;
4841
4842 case OP_POSSTAR:
4843 case OP_POSPLUS:
4844 case OP_POSUPTO:
4845 if (opcode != OP_POSSTAR)
4846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
4847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4848 label = LABEL();
4849 compile_char1_hotpath(common, type, cc, &nomatch);
4850 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4851 if (opcode != OP_POSUPTO)
4852 {
4853 if (opcode == OP_POSPLUS)
4854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
4855 JUMPTO(SLJIT_JUMP, label);
4856 }
4857 else
4858 {
4859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4860 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4861 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4862 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
4863 }
4864 set_jumps(nomatch, LABEL());
4865 if (opcode == OP_POSPLUS)
4866 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
4867 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4868 break;
4869
4870 case OP_POSQUERY:
4871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4872 compile_char1_hotpath(common, type, cc, &nomatch);
4873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
4874 set_jumps(nomatch, LABEL());
4875 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4876 break;
4877
4878 default:
4879 SLJIT_ASSERT_STOP();
4880 break;
4881 }
4882
4883 decrease_call_count(common);
4884 return end;
4885 }
4886
4887 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4888 {
4889 DEFINE_COMPILER;
4890 fallback_common *fallback;
4891
4892 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4893
4894 if (*cc == OP_FAIL)
4895 {
4896 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4897 return cc + 1;
4898 }
4899
4900 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
4901 {
4902 /* No need to check notempty conditions. */
4903 if (common->acceptlabel == NULL)
4904 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
4905 else
4906 JUMPTO(SLJIT_JUMP, common->acceptlabel);
4907 return cc + 1;
4908 }
4909
4910 if (common->acceptlabel == NULL)
4911 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
4912 else
4913 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
4914 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4915 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
4916 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4917 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
4918 if (common->acceptlabel == NULL)
4919 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4920 else
4921 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
4922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4923 if (common->acceptlabel == NULL)
4924 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
4925 else
4926 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
4927 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
4928 return cc + 1;
4929 }
4930
4931 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
4932 {
4933 DEFINE_COMPILER;
4934 int offset = GET2(cc, 1);
4935
4936 /* Data will be discarded anyway... */
4937 if (common->currententry != NULL)
4938 return cc + 3;
4939
4940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
4941 offset <<= 1;
4942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4943 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4944 return cc + 3;
4945 }
4946
4947 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
4948 {
4949 DEFINE_COMPILER;
4950 fallback_common *fallback;
4951
4952 while (cc < ccend)
4953 {
4954 switch(*cc)
4955 {
4956 case OP_SOD:
4957 case OP_SOM:
4958 case OP_NOT_WORD_BOUNDARY:
4959 case OP_WORD_BOUNDARY:
4960 case OP_NOT_DIGIT:
4961 case OP_DIGIT:
4962 case OP_NOT_WHITESPACE:
4963 case OP_WHITESPACE:
4964 case OP_NOT_WORDCHAR:
4965 case OP_WORDCHAR:
4966 case OP_ANY:
4967 case OP_ALLANY:
4968 case OP_ANYBYTE:
4969 case OP_NOTPROP:
4970 case OP_PROP:
4971 case OP_ANYNL:
4972 case OP_NOT_HSPACE:
4973 case OP_HSPACE:
4974 case OP_NOT_VSPACE:
4975 case OP_VSPACE:
4976 case OP_EXTUNI:
4977 case OP_EODN:
4978 case OP_EOD:
4979 case OP_CIRC:
4980 case OP_CIRCM:
4981 case OP_DOLL:
4982 case OP_DOLLM:
4983 case OP_NOT:
4984 case OP_NOTI:
4985 case OP_REVERSE:
4986 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
4987 break;
4988
4989 case OP_SET_SOM:
4990 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
4991 allocate_stack(common, 1);
4992 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
4994 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4995 cc++;
4996 break;
4997
4998 case OP_CHAR:
4999 case OP_CHARI:
5000 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5001 break;
5002
5003 case OP_STAR:
5004 case OP_MINSTAR:
5005 case OP_PLUS:
5006 case OP_MINPLUS:
5007 case OP_QUERY:
5008 case OP_MINQUERY:
5009 case OP_UPTO:
5010 case OP_MINUPTO:
5011 case OP_EXACT:
5012 case OP_POSSTAR:
5013 case OP_POSPLUS:
5014 case OP_POSQUERY:
5015 case OP_POSUPTO:
5016 case OP_STARI:
5017 case OP_MINSTARI:
5018 case OP_PLUSI:
5019 case OP_MINPLUSI:
5020 case OP_QUERYI:
5021 case OP_MINQUERYI:
5022 case OP_UPTOI:
5023 case OP_MINUPTOI:
5024 case OP_EXACTI:
5025 case OP_POSSTARI:
5026 case OP_POSPLUSI:
5027 case OP_POSQUERYI:
5028 case OP_POSUPTOI:
5029 case OP_NOTSTAR:
5030 case OP_NOTMINSTAR:
5031 case OP_NOTPLUS:
5032 case OP_NOTMINPLUS:
5033 case OP_NOTQUERY:
5034 case OP_NOTMINQUERY:
5035 case OP_NOTUPTO:
5036 case OP_NOTMINUPTO:
5037 case OP_NOTEXACT:
5038 case OP_NOTPOSSTAR:
5039 case OP_NOTPOSPLUS:
5040 case OP_NOTPOSQUERY:
5041 case OP_NOTPOSUPTO:
5042 case OP_NOTSTARI:
5043 case OP_NOTMINSTARI:
5044 case OP_NOTPLUSI:
5045 case OP_NOTMINPLUSI:
5046 case OP_NOTQUERYI:
5047 case OP_NOTMINQUERYI:
5048 case OP_NOTUPTOI:
5049 case OP_NOTMINUPTOI:
5050 case OP_NOTEXACTI:
5051 case OP_NOTPOSSTARI:
5052 case OP_NOTPOSPLUSI:
5053 case OP_NOTPOSQUERYI:
5054 case OP_NOTPOSUPTOI:
5055 case OP_TYPESTAR:
5056 case OP_TYPEMINSTAR:
5057 case OP_TYPEPLUS:
5058 case OP_TYPEMINPLUS:
5059 case OP_TYPEQUERY:
5060 case OP_TYPEMINQUERY:
5061 case OP_TYPEUPTO:
5062 case OP_TYPEMINUPTO:
5063 case OP_TYPEEXACT:
5064 case OP_TYPEPOSSTAR:
5065 case OP_TYPEPOSPLUS:
5066 case OP_TYPEPOSQUERY:
5067 case OP_TYPEPOSUPTO:
5068 cc = compile_iterator_hotpath(common, cc, parent);
5069 break;
5070
5071 case OP_CLASS:
5072 case OP_NCLASS:
5073 if (cc[33] >= OP_CRSTAR && cc[33] <= OP_CRMINRANGE)
5074 cc = compile_iterator_hotpath(common, cc, parent);
5075 else
5076 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5077 break;
5078
5079 #ifdef SUPPORT_UTF8
5080 case OP_XCLASS:
5081 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5082 cc = compile_iterator_hotpath(common, cc, parent);
5083 else
5084 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5085 break;
5086 #endif
5087
5088 case OP_REF:
5089 case OP_REFI:
5090 if (cc[3] >= OP_CRSTAR && cc[3] <= OP_CRMINRANGE)
5091 cc = compile_ref_iterator_hotpath(common, cc, parent);
5092 else
5093 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5094 break;
5095
5096 case OP_RECURSE:
5097 cc = compile_recurse_hotpath(common, cc, parent);
5098 break;
5099
5100 case OP_ASSERT:
5101 case OP_ASSERT_NOT:
5102 case OP_ASSERTBACK:
5103 case OP_ASSERTBACK_NOT:
5104 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5105 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5106 break;
5107
5108 case OP_BRAMINZERO:
5109 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5110 cc = bracketend(cc + 1);
5111 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5112 {
5113 allocate_stack(common, 1);
5114 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5115 }
5116 else
5117 {
5118 allocate_stack(common, 2);
5119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5121 }
5122 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5123 if (cc[1] > OP_ASSERTBACK_NOT)
5124 decrease_call_count(common);
5125 break;
5126
5127 case OP_ONCE:
5128 case OP_ONCE_NC:
5129 case OP_BRA:
5130 case OP_CBRA:
5131 case OP_COND:
5132 case OP_SBRA:
5133 case OP_SCBRA:
5134 case OP_SCOND:
5135 cc = compile_bracket_hotpath(common, cc, parent);
5136 break;
5137
5138 case OP_BRAZERO:
5139 if (cc[1] > OP_ASSERTBACK_NOT)
5140 cc = compile_bracket_hotpath(common, cc, parent);
5141 else
5142 {
5143 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5144 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5145 }
5146 break;
5147
5148 case OP_BRAPOS:
5149 case OP_CBRAPOS:
5150 case OP_SBRAPOS:
5151 case OP_SCBRAPOS:
5152 case OP_BRAPOSZERO:
5153 cc = compile_bracketpos_hotpath(common, cc, parent);
5154 break;
5155
5156 case OP_FAIL:
5157 case OP_ACCEPT:
5158 case OP_ASSERT_ACCEPT:
5159 cc = compile_fail_accept_hotpath(common, cc, parent);
5160 break;
5161
5162 case OP_CLOSE:
5163 cc = compile_close_hotpath(common, cc);
5164 break;
5165
5166 case OP_SKIPZERO:
5167 cc = bracketend(cc + 1);
5168 break;
5169
5170 default:
5171 SLJIT_ASSERT_STOP();
5172 return;
5173 }
5174 if (cc == NULL)
5175 return;
5176 }
5177 SLJIT_ASSERT(cc == ccend);
5178 }
5179
5180 #undef PUSH_FALLBACK
5181 #undef PUSH_FALLBACK_NOVALUE
5182 #undef FALLBACK_AS
5183
5184 #define COMPILE_FALLBACKPATH(current) \
5185 do \
5186 { \
5187 compile_fallbackpath(common, (current)); \
5188 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5189 return; \
5190 } \
5191 while (0)
5192
5193 #define CURRENT_AS(type) ((type*)current)
5194
5195 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5196 {
5197 DEFINE_COMPILER;
5198 pcre_uchar *cc = current->cc;
5199 pcre_uchar opcode;
5200 pcre_uchar type;
5201 int arg1 = -1, arg2 = -1;
5202 struct sljit_label *label = NULL;
5203 struct sljit_jump *jump = NULL;
5204
5205 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5206
5207 switch(opcode)
5208 {
5209 case OP_STAR:
5210 case OP_PLUS:
5211 case OP_UPTO:
5212 case OP_CRRANGE:
5213 if (type == OP_ANYNL || type == OP_EXTUNI)
5214 {
5215 set_jumps(current->topfallbacks, LABEL());
5216 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5217 free_stack(common, 1);
5218 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5219 }
5220 else
5221 {
5222 if (opcode == OP_STAR || opcode == OP_UPTO)
5223 arg2 = 0;
5224 else if (opcode == OP_PLUS)
5225 arg2 = 1;
5226 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5227 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5228 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5229 skip_char_back(common);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5231 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5232 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5233 set_jumps(current->topfallbacks, LABEL());
5234 JUMPHERE(jump);
5235 free_stack(common, 2);
5236 }
5237 break;
5238
5239 case OP_MINSTAR:
5240 case OP_MINPLUS:
5241 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5242 if (opcode == OP_MINPLUS)
5243 {
5244 set_jumps(current->topfallbacks, LABEL());
5245 current->topfallbacks = NULL;
5246 }
5247 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5249 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5250 set_jumps(current->topfallbacks, LABEL());
5251 free_stack(common, 1);
5252 break;
5253
5254 case OP_MINUPTO:
5255 case OP_CRMINRANGE:
5256 if (opcode == OP_CRMINRANGE)
5257 {
5258 set_jumps(current->topfallbacks, LABEL());
5259 current->topfallbacks = NULL;
5260 label = LABEL();
5261 }
5262 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5263 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5264
5265 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5267 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5269
5270 if (opcode == OP_CRMINRANGE)
5271 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5272
5273 if (opcode == OP_CRMINRANGE && arg1 == 0)
5274 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5275 else
5276 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5277
5278 set_jumps(current->topfallbacks, LABEL());
5279 free_stack(common, 2);
5280 break;
5281
5282 case OP_QUERY:
5283 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5285 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5286 jump = JUMP(SLJIT_JUMP);
5287 set_jumps(current->topfallbacks, LABEL());
5288 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5290 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5291 JUMPHERE(jump);
5292 free_stack(common, 1);
5293 break;
5294
5295 case OP_MINQUERY:
5296 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5298 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5299 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5300 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5301 set_jumps(current->topfallbacks, LABEL());
5302 JUMPHERE(jump);
5303 free_stack(common, 1);
5304 break;
5305
5306 case OP_EXACT:
5307 case OP_POSPLUS:
5308 set_jumps(current->topfallbacks, LABEL());
5309 break;
5310
5311 case OP_POSSTAR:
5312 case OP_POSQUERY:
5313 case OP_POSUPTO:
5314 break;
5315
5316 default:
5317 SLJIT_ASSERT_STOP();
5318 break;
5319 }
5320 }
5321
5322 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5323 {
5324 DEFINE_COMPILER;
5325 pcre_uchar *cc = current->cc;
5326 pcre_uchar type;
5327
5328 type = cc[3];
5329 if ((type & 0x1) == 0)
5330 {
5331 set_jumps(current->topfallbacks, LABEL());
5332 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5333 free_stack(common, 1);
5334 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5335 return;
5336 }
5337
5338 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5339 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5340 set_jumps(current->topfallbacks, LABEL());
5341 free_stack(common, 2);
5342 }
5343
5344 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5345 {
5346 DEFINE_COMPILER;
5347
5348 set_jumps(current->topfallbacks, LABEL());
5349 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5350 free_stack(common, 1);
5351 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5352 }
5353
5354 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5355 {
5356 DEFINE_COMPILER;
5357 pcre_uchar *cc = current->cc;
5358 pcre_uchar bra = OP_BRA;
5359 struct sljit_jump *brajump = NULL;
5360
5361 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5362 if (*cc == OP_BRAZERO)
5363 {
5364 bra = *cc;
5365 cc++;
5366 }
5367
5368 if (bra == OP_BRAZERO)
5369 {
5370 SLJIT_ASSERT(current->topfallbacks == NULL);
5371 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5372 }
5373
5374 if (CURRENT_AS(assert_fallback)->framesize < 0)
5375 {
5376 set_jumps(current->topfallbacks, LABEL());
5377
5378 if (bra == OP_BRAZERO)
5379 {
5380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5381 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5382 free_stack(common, 1);
5383 }
5384 return;
5385 }
5386
5387 if (bra == OP_BRAZERO)
5388 {
5389 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5390 {
5391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5392 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5393 free_stack(common, 1);
5394 return;
5395 }
5396 free_stack(common, 1);
5397 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5398 }
5399
5400 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5401 {
5402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5403 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5405
5406 set_jumps(current->topfallbacks, LABEL());
5407 }
5408 else
5409 set_jumps(current->topfallbacks, LABEL());
5410
5411 if (bra == OP_BRAZERO)
5412 {
5413 /* We know there is enough place on the stack. */
5414 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5416 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5417 JUMPHERE(brajump);
5418 }
5419 }
5420
5421 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5422 {
5423 DEFINE_COMPILER;
5424 int opcode;
5425 int offset = 0;
5426 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5427 int stacksize;
5428 int count;
5429 pcre_uchar *cc = current->cc;
5430 pcre_uchar *ccbegin;
5431 pcre_uchar *ccprev;
5432 jump_list *jumplist = NULL;
5433 jump_list *jumplistitem = NULL;
5434 pcre_uchar bra = OP_BRA;
5435 pcre_uchar ket;
5436 assert_fallback *assert;
5437 BOOL has_alternatives;
5438 struct sljit_jump *brazero = NULL;
5439 struct sljit_jump *once = NULL;
5440 struct sljit_jump *cond = NULL;
5441 struct sljit_label *rminlabel = NULL;
5442
5443 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5444 {
5445 bra = *cc;
5446 cc++;
5447 }
5448
5449 opcode = *cc;
5450 ccbegin = cc;
5451 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5452 cc += GET(cc, 1);
5453 has_alternatives = *cc == OP_ALT;
5454 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5455 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5456 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5457 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5458 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5459 opcode = OP_SCOND;
5460 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5461 opcode = OP_ONCE;
5462
5463 if (ket == OP_KETRMAX)
5464 {
5465 if (bra != OP_BRAZERO)
5466 free_stack(common, 1);
5467 else
5468 {
5469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5470 free_stack(common, 1);
5471 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5472 }
5473 }
5474 else if (ket == OP_KETRMIN)
5475 {
5476 if (bra != OP_BRAMINZERO)
5477 {
5478 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5479 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5480 {
5481 /* Checking zero-length iteration. */
5482 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5483 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5484 else
5485 {
5486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5487 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5488 }
5489 if (opcode != OP_ONCE)
5490 free_stack(common, 1);
5491 }
5492 else
5493 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5494 }
5495 rminlabel = LABEL();
5496 }
5497 else if (bra == OP_BRAZERO)
5498 {
5499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5500 free_stack(common, 1);
5501 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5502 }
5503
5504 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5505 {
5506 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5507 {
5508 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5509 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5510 }
5511 once = JUMP(SLJIT_JUMP);
5512 }
5513 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5514 {
5515 if (has_alternatives)
5516 {
5517 /* Always exactly one alternative. */
5518 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5519 free_stack(common, 1);
5520
5521 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5522 if (SLJIT_UNLIKELY(!jumplistitem))
5523 return;
5524 jumplist = jumplistitem;
5525 jumplistitem->next = NULL;
5526 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5527 }
5528 }
5529 else if (*cc == OP_ALT)
5530 {
5531 /* Build a jump list. Get the last successfully matched branch index. */
5532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5533 free_stack(common, 1);
5534 count = 1;
5535 do
5536 {
5537 /* Append as the last item. */
5538 if (jumplist != NULL)
5539 {
5540 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
5541 jumplistitem = jumplistitem->next;
5542 }
5543 else
5544 {
5545 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5546 jumplist = jumplistitem;
5547 }
5548
5549 if (SLJIT_UNLIKELY(!jumplistitem))
5550 return;
5551
5552 jumplistitem->next = NULL;
5553 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
5554 cc += GET(cc, 1);
5555 }
5556 while (*cc == OP_ALT);
5557
5558 cc = ccbegin + GET(ccbegin, 1);
5559 }
5560
5561 COMPILE_FALLBACKPATH(current->top);
5562 if (current->topfallbacks)
5563 set_jumps(current->topfallbacks, LABEL());
5564
5565 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5566 {
5567 /* Conditional block always has at most one alternative. */
5568 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
5569 {
5570 SLJIT_ASSERT(has_alternatives);
5571 assert = CURRENT_AS(bracket_fallback)->u.assert;
5572 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
5573 {
5574 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5575 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5576 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5577 }
5578 cond = JUMP(SLJIT_JUMP);
5579 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
5580 }
5581 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
5582 {
5583 SLJIT_ASSERT(has_alternatives);
5584 cond = JUMP(SLJIT_JUMP);
5585 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
5586 }
5587 else
5588 SLJIT_ASSERT(!has_alternatives);
5589 }
5590
5591 if (has_alternatives)
5592 {
5593 count = 1;
5594 do
5595 {
5596 current->top = NULL;
5597 current->topfallbacks = NULL;
5598 current->nextfallbacks = NULL;
5599 if (*cc == OP_ALT)
5600 {
5601 ccprev = cc + 1 + LINK_SIZE;
5602 cc += GET(cc, 1);
5603 if (opcode != OP_COND && opcode != OP_SCOND)
5604 {
5605 if (localptr != 0 && opcode != OP_ONCE)
5606 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5607 else
5608 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5609 }
5610 compile_hotpath(common, ccprev, cc, current);
5611 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5612 return;
5613 }
5614
5615 /* Instructions after the current alternative is succesfully matched. */
5616 /* There is a similar code in compile_bracket_hotpath. */
5617 if (opcode == OP_ONCE)
5618 {
5619 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
5620 {
5621 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5622 /* TMP2 which is set here used by OP_KETRMAX below. */
5623 if (ket == OP_KETRMAX)
5624 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5625 else if (ket == OP_KETRMIN)
5626 {
5627 /* Move the STR_PTR to the localptr. */
5628 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5629 }
5630 }
5631 else
5632 {
5633 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
5634 if (ket == OP_KETRMAX)
5635 {
5636 /* TMP2 which is set here used by OP_KETRMAX below. */
5637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5638 }
5639 }
5640 }
5641
5642 stacksize = 0;
5643 if (opcode != OP_ONCE)
5644 stacksize++;
5645 if (ket != OP_KET || bra != OP_BRA)
5646 stacksize++;
5647
5648 if (stacksize > 0) {
5649 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5650 allocate_stack(common, stacksize);
5651 else
5652 {
5653 /* We know we have place at least for one item on the top of the stack. */
5654 SLJIT_ASSERT(stacksize == 1);
5655 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5656 }
5657 }
5658
5659 stacksize = 0;
5660 if (ket != OP_KET || bra != OP_BRA)
5661 {
5662 if (ket != OP_KET)
5663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5664 else
5665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5666 stacksize++;
5667 }
5668
5669 if (opcode != OP_ONCE)
5670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
5671
5672 if (offset != 0)
5673 {
5674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5677 }
5678
5679 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
5680
5681 if (opcode != OP_ONCE)
5682 {
5683 SLJIT_ASSERT(jumplist);
5684 JUMPHERE(jumplist->jump);
5685 jumplist = jumplist->next;
5686 }
5687
5688 COMPILE_FALLBACKPATH(current->top);
5689 if (current->topfallbacks)
5690 set_jumps(current->topfallbacks, LABEL());
5691 SLJIT_ASSERT(!current->nextfallbacks);
5692 }
5693 while (*cc == OP_ALT);
5694 SLJIT_ASSERT(!jumplist);
5695
5696 if (cond != NULL)
5697 {
5698 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
5699 assert = CURRENT_AS(bracket_fallback)->u.assert;
5700 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT))
5701 {
5702 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
5703 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
5705 }
5706 JUMPHERE(cond);
5707 }
5708
5709 /* Free the STR_PTR. */
5710 if (localptr == 0)
5711 free_stack(common, 1);
5712 }
5713
5714 if (offset != 0)
5715 {
5716 /* Using both tmp register is better for instruction scheduling. */
5717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
5722 free_stack(common, 3);
5723 }
5724 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5725 {
5726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
5727 free_stack(common, 1);
5728 }
5729 else if (opcode == OP_ONCE)
5730 {
5731 cc = ccbegin + GET(ccbegin, 1);
5732 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5733 {
5734 /* Reset head and drop saved frame. */
5735 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
5736 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
5737 }
5738 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
5739 {
5740 /* The STR_PTR must be released. */
5741 free_stack(common, 1);
5742 }
5743
5744 JUMPHERE(once);
5745 /* Restore previous localptr */
5746 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
5748 else if (ket == OP_KETRMIN)
5749 {
5750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5751 /* See the comment below. */
5752 free_stack(common, 2);
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5754 }
5755 }
5756
5757 if (ket == OP_KETRMAX)
5758 {
5759 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5760 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
5761 if (bra == OP_BRAZERO)
5762 {
5763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5764 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5765 JUMPHERE(brazero);
5766 }
5767 free_stack(common, 1);
5768 }
5769 else if (ket == OP_KETRMIN)
5770 {
5771 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5772
5773 /* OP_ONCE removes everything in case of a fallback, so we don't
5774 need to explicitly release the STR_PTR. The extra release would
5775 affect badly the free_stack(2) above. */
5776 if (opcode != OP_ONCE)
5777 free_stack(common, 1);
5778 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
5779 if (opcode == OP_ONCE)
5780 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
5781 else if (bra == OP_BRAMINZERO)
5782 free_stack(common, 1);
5783 }
5784 else if (bra == OP_BRAZERO)
5785 {
5786 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5787 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
5788 JUMPHERE(brazero);
5789 }
5790 }
5791
5792 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
5793 {
5794 DEFINE_COMPILER;
5795 int offset;
5796 struct sljit_jump *jump;
5797
5798 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
5799 {
5800 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
5801 {
5802 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
5803 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5804 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
5807 }
5808 set_jumps(current->topfallbacks, LABEL());
5809 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5810 return;
5811 }
5812
5813 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
5814 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5815
5816 if (current->topfallbacks)
5817 {
5818 jump = JUMP(SLJIT_JUMP);
5819 set_jumps(current->topfallbacks, LABEL());
5820 /* Drop the stack frame. */
5821 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
5822 JUMPHERE(jump);
5823 }
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
5825 }
5826
5827 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
5828 {
5829 assert_fallback fallback;
5830
5831 current->top = NULL;
5832 current->topfallbacks = NULL;
5833 current->nextfallbacks = NULL;
5834 if (current->cc[1] > OP_ASSERTBACK_NOT)
5835 {
5836 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
5837 compile_bracket_hotpath(common, current->cc, current);
5838 compile_bracket_fallbackpath(common, current->top);
5839 }
5840 else
5841 {
5842 memset(&fallback, 0, sizeof(fallback));
5843 fallback.common.cc = current->cc;
5844 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
5845 /* Manual call of compile_assert_hotpath. */
5846 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
5847 }
5848 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
5849 }
5850
5851 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
5852 {
5853 DEFINE_COMPILER;
5854
5855 while (current)
5856 {
5857 if (current->nextfallbacks != NULL)
5858 set_jumps(current->nextfallbacks, LABEL());
5859 switch(*current->cc)
5860 {
5861 case OP_SET_SOM:
5862 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5863 free_stack(common, 1);
5864 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
5865 break;
5866
5867 case OP_STAR:
5868 case OP_MINSTAR:
5869 case OP_PLUS:
5870 case OP_MINPLUS:
5871 case OP_QUERY:
5872 case OP_MINQUERY:
5873 case OP_UPTO:
5874 case OP_MINUPTO:
5875 case OP_EXACT:
5876 case OP_POSSTAR:
5877 case OP_POSPLUS:
5878 case OP_POSQUERY:
5879 case OP_POSUPTO:
5880 case OP_STARI:
5881 case OP_MINSTARI:
5882 case OP_PLUSI:
5883 case OP_MINPLUSI:
5884 case OP_QUERYI:
5885 case OP_MINQUERYI:
5886 case OP_UPTOI:
5887 case OP_MINUPTOI:
5888 case OP_EXACTI:
5889 case OP_POSSTARI:
5890 case OP_POSPLUSI:
5891 case OP_POSQUERYI:
5892 case OP_POSUPTOI:
5893 case OP_NOTSTAR:
5894 case OP_NOTMINSTAR:
5895 case OP_NOTPLUS:
5896 case OP_NOTMINPLUS:
5897 case OP_NOTQUERY:
5898 case OP_NOTMINQUERY:
5899 case OP_NOTUPTO:
5900 case OP_NOTMINUPTO:
5901 case OP_NOTEXACT:
5902 case OP_NOTPOSSTAR:
5903 case OP_NOTPOSPLUS:
5904 case OP_NOTPOSQUERY:
5905 case OP_NOTPOSUPTO:
5906 case OP_NOTSTARI:
5907 case OP_NOTMINSTARI:
5908 case OP_NOTPLUSI:
5909 case OP_NOTMINPLUSI:
5910 case OP_NOTQUERYI:
5911 case OP_NOTMINQUERYI:
5912 case OP_NOTUPTOI:
5913 case OP_NOTMINUPTOI:
5914 case OP_NOTEXACTI:
5915 case OP_NOTPOSSTARI:
5916 case OP_NOTPOSPLUSI:
5917 case OP_NOTPOSQUERYI:
5918 case OP_NOTPOSUPTOI:
5919 case OP_TYPESTAR:
5920 case OP_TYPEMINSTAR:
5921 case OP_TYPEPLUS:
5922 case OP_TYPEMINPLUS:
5923 case OP_TYPEQUERY:
5924 case OP_TYPEMINQUERY:
5925 case OP_TYPEUPTO:
5926 case OP_TYPEMINUPTO:
5927 case OP_TYPEEXACT:
5928 case OP_TYPEPOSSTAR:
5929 case OP_TYPEPOSPLUS:
5930 case OP_TYPEPOSQUERY:
5931 case OP_TYPEPOSUPTO:
5932 case OP_CLASS:
5933 case OP_NCLASS:
5934 case OP_XCLASS:
5935 compile_iterator_fallbackpath(common, current);
5936 break;
5937
5938 case OP_REF:
5939 case OP_REFI:
5940 compile_ref_iterator_fallbackpath(common, current);
5941 break;
5942
5943 case OP_RECURSE:
5944 compile_recurse_fallbackpath(common, current);
5945 break;
5946
5947 case OP_ASSERT:
5948 case OP_ASSERT_NOT:
5949 case OP_ASSERTBACK:
5950 case OP_ASSERTBACK_NOT:
5951 compile_assert_fallbackpath(common, current);
5952 break;
5953
5954 case OP_ONCE:
5955 case OP_ONCE_NC:
5956 case OP_BRA:
5957 case OP_CBRA:
5958 case OP_COND:
5959 case OP_SBRA:
5960 case OP_SCBRA:
5961 case OP_SCOND:
5962 compile_bracket_fallbackpath(common, current);
5963 break;
5964
5965 case OP_BRAZERO:
5966 if (current->cc[1] > OP_ASSERTBACK_NOT)
5967 compile_bracket_fallbackpath(common, current);
5968 else
5969 compile_assert_fallbackpath(common, current);
5970 break;
5971
5972 case OP_BRAPOS:
5973 case OP_CBRAPOS:
5974 case OP_SBRAPOS:
5975 case OP_SCBRAPOS:
5976 case OP_BRAPOSZERO:
5977 compile_bracketpos_fallbackpath(common, current);
5978 break;
5979
5980 case OP_BRAMINZERO:
5981 compile_braminzero_fallbackpath(common, current);
5982 break;
5983
5984 case OP_FAIL:
5985 case OP_ACCEPT:
5986 case OP_ASSERT_ACCEPT:
5987 set_jumps(current->topfallbacks, LABEL());
5988 break;
5989
5990 default:
5991 SLJIT_ASSERT_STOP();
5992 break;
5993 }
5994 current = current->prev;
5995 }
5996 }
5997
5998 static SLJIT_INLINE void compile_recurse(compiler_common *common)
5999 {
6000 DEFINE_COMPILER;
6001 pcre_uchar *cc = common->start + common->currententry->start;
6002 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : 2);
6003 pcre_uchar *ccend = bracketend(cc);
6004 int localsize = get_localsize(common, ccbegin, ccend);
6005 int framesize = get_framesize(common, cc, TRUE);
6006 int alternativesize;
6007 BOOL needsframe;
6008 fallback_common altfallback;
6009 struct sljit_jump *jump;
6010
6011 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6012 needsframe = framesize >= 0;
6013 if (!needsframe)
6014 framesize = 0;
6015 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6016
6017 SLJIT_ASSERT(common->currententry->entry == NULL);
6018 common->currententry->entry = LABEL();
6019 set_jumps(common->currententry->calls, common->currententry->entry);
6020
6021 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6022 allocate_stack(common, localsize + framesize + alternativesize);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6024 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6026 if (needsframe)
6027 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6028
6029 if (alternativesize > 0)
6030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6031
6032 memset(&altfallback, 0, sizeof(fallback_common));
6033 common->acceptlabel = NULL;
6034 common->accept = NULL;
6035 altfallback.cc = ccbegin;
6036 cc += GET(cc, 1);
6037 while (1)
6038 {
6039 altfallback.top = NULL;
6040 altfallback.topfallbacks = NULL;
6041
6042 if (altfallback.cc != ccbegin)
6043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6044
6045 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6046 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6047 return;
6048
6049 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6050
6051 compile_fallbackpath(common, altfallback.top);
6052 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6053 return;
6054 set_jumps(altfallback.topfallbacks, LABEL());
6055
6056 if (*cc != OP_ALT)
6057 break;
6058
6059 altfallback.cc = cc + 1 + LINK_SIZE;
6060 cc += GET(cc, 1);
6061 }
6062 /* None of them matched. */
6063 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6064 jump = JUMP(SLJIT_JUMP);
6065
6066 set_jumps(common->accept, LABEL());
6067 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6068 if (needsframe)
6069 {
6070 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6071 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6072 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6073 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6075 }
6076 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6077
6078 JUMPHERE(jump);
6079 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6080 free_stack(common, localsize + framesize + alternativesize);
6081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6082 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6084 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6085 }
6086
6087 #undef COMPILE_FALLBACKPATH
6088 #undef CURRENT_AS
6089
6090 void
6091 _pcre_jit_compile(const real_pcre *re, pcre_extra *extra)
6092 {
6093 struct sljit_compiler *compiler;
6094 fallback_common rootfallback;
6095 compiler_common common_data;
6096 compiler_common *common = &common_data;
6097 const pcre_uint8 *tables = re->tables;
6098 pcre_study_data *study;
6099 pcre_uchar *ccend;
6100 executable_function *function;
6101 void *executable_func;
6102 struct sljit_label *leave;
6103 struct sljit_label *mainloop = NULL;
6104 struct sljit_label *empty_match_found;
6105 struct sljit_label *empty_match_fallback;
6106 struct sljit_jump *alloc_error;
6107 struct sljit_jump *reqbyte_notfound = NULL;
6108 struct sljit_jump *empty_match;
6109
6110 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6111 study = extra->study_data;
6112
6113 if (!tables)
6114 tables = _pcre_default_tables;
6115
6116 memset(&rootfallback, 0, sizeof(fallback_common));
6117 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6118
6119 common->compiler = NULL;
6120 common->start = rootfallback.cc;
6121 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6122 common->fcc = tables + fcc_offset;
6123 common->lcc = (sljit_w)(tables + lcc_offset);
6124 common->nltype = NLTYPE_FIXED;
6125 switch(re->options & PCRE_NEWLINE_BITS)
6126 {
6127 case 0:
6128 /* Compile-time default */
6129 switch (NEWLINE)
6130 {
6131 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6132 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6133 default: common->newline = NEWLINE; break;
6134 }
6135 break;
6136 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6137 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6138 case PCRE_NEWLINE_CR+
6139 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6140 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6141 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6142 default: return;
6143 }
6144 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6145 common->bsr_nltype = NLTYPE_ANYCRLF;
6146 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6147 common->bsr_nltype = NLTYPE_ANY;
6148 else
6149 {
6150 #ifdef BSR_ANYCRLF
6151 common->bsr_nltype = NLTYPE_ANYCRLF;
6152 #else
6153 common->bsr_nltype = NLTYPE_ANY;
6154 #endif
6155 }
6156 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6157 common->ctypes = (sljit_w)(tables + ctypes_offset);
6158 common->name_table = (sljit_w)re + re->name_table_offset;
6159 common->name_count = re->name_count;
6160 common->name_entry_size = re->name_entry_size;
6161 common->acceptlabel = NULL;
6162 common->stubs = NULL;
6163 common->entries = NULL;
6164 common->currententry = NULL;
6165 common->accept = NULL;
6166 common->calllimit = NULL;
6167 common->stackalloc = NULL;
6168 common->revertframes = NULL;
6169 common->wordboundary = NULL;
6170 common->anynewline = NULL;
6171 common->hspace = NULL;
6172 common->vspace = NULL;
6173 common->casefulcmp = NULL;
6174 common->caselesscmp = NULL;
6175 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6176 #ifdef SUPPORT_UTF8
6177 common->utf8 = (re->options & PCRE_UTF8) != 0;
6178 #ifdef SUPPORT_UCP
6179 common->useucp = (re->options & PCRE_UCP) != 0;
6180 #endif
6181 common->utf8readchar = NULL;
6182 common->utf8readtype8 = NULL;
6183 #endif
6184 #ifdef SUPPORT_UCP
6185 common->getucd = NULL;
6186 #endif
6187 ccend = bracketend(rootfallback.cc);
6188 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6189 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6190 if (common->localsize < 0)
6191 return;
6192 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6193 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6194 return;
6195 common->localptrs = (int*)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6196 if (!common->localptrs)
6197 return;
6198 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6199 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6200
6201 compiler = sljit_create_compiler();
6202 if (!compiler)
6203 {
6204 SLJIT_FREE(common->localptrs);
6205 return;
6206 }
6207 common->compiler = compiler;
6208
6209 /* Main pcre_jit_exec entry. */
6210 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6211
6212 /* Register init. */
6213 reset_ovector(common, (re->top_bracket + 1) * 2);
6214 if ((re->flags & PCRE_REQCHSET) != 0)
6215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_BYTE_PTR, SLJIT_TEMPORARY_REG1, 0);
6216
6217 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_GENERAL_REG1, 0);
6218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_GENERAL_REG1, 0);
6219 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6220 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6222 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6223 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6224 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6226
6227 /* Main part of the matching */
6228 if ((re->options & PCRE_ANCHORED) == 0)
6229 {
6230 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6231 /* Forward search if possible. */
6232 if ((re->flags & PCRE_FIRSTSET) != 0)
6233 fast_forward_first_byte(common, re->first_byte, (re->options & PCRE_FIRSTLINE) != 0);
6234 else if ((re->flags & PCRE_STARTLINE) != 0)
6235 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6236 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6237 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6238 }
6239 if ((re->flags & PCRE_REQCHSET) != 0)
6240 reqbyte_notfound = search_requested_char(common, re->req_byte, (re->flags & PCRE_FIRSTSET) != 0);
6241
6242 /* Store the current STR_PTR in OVECTOR(0). */
6243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6244 /* Copy the limit of allowed recursions. */
6245 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6246
6247 compile_hotpath(common, rootfallback.cc, ccend, &rootfallback);
6248 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6249 {
6250 sljit_free_compiler(compiler);
6251 SLJIT_FREE(common->localptrs);
6252 return;
6253 }
6254
6255 empty_match = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6256 empty_match_found = LABEL();
6257
6258 common->acceptlabel = LABEL();
6259 if (common->accept != NULL)
6260 set_jumps(common->accept, common->acceptlabel);
6261
6262 /* This means we have a match. Update the ovector. */
6263 copy_ovector(common, re->top_bracket + 1);
6264 leave = LABEL();
6265 sljit_emit_return(compiler, SLJIT_UNUSED, 0);
6266
6267 empty_match_fallback = LABEL();
6268 compile_fallbackpath(common, rootfallback.top);
6269 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6270 {
6271 sljit_free_compiler(compiler);
6272 SLJIT_FREE(common->localptrs);
6273 return;
6274 }
6275
6276 SLJIT_ASSERT(rootfallback.prev == NULL);
6277
6278 /* Check we have remaining characters. */
6279 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6280
6281 if ((re->options & PCRE_ANCHORED) == 0)
6282 {
6283 if ((re->options & PCRE_FIRSTLINE) == 0)
6284 {
6285 if (study != NULL && study->minlength > 1)
6286 {
6287 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength);
6288 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_END, 0, mainloop);
6289 }
6290 else
6291 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
6292 }
6293 else
6294 {
6295 if (study != NULL && study->minlength > 1)
6296 {
6297 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, study->minlength);
6298 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0);
6299 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER);
6300 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
6301 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_GREATER_EQUAL);
6302 JUMPTO(SLJIT_C_ZERO, mainloop);
6303 }
6304 else
6305 CMPTO(SLJIT_C_LESS, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, mainloop);
6306 }
6307 }
6308
6309 if (reqbyte_notfound != NULL)
6310 JUMPHERE(reqbyte_notfound);
6311 /* Copy OVECTOR(1) to OVECTOR(0) */
6312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6313 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
6314 JUMPTO(SLJIT_JUMP, leave);
6315
6316 flush_stubs(common);
6317
6318 JUMPHERE(empty_match);
6319 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6320 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6321 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_fallback);
6322 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6323 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found);
6324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6325 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found);
6326 JUMPTO(SLJIT_JUMP, empty_match_fallback);
6327
6328 common->currententry = common->entries;
6329 while (common->currententry != NULL)
6330 {
6331 /* Might add new entries. */
6332 compile_recurse(common);
6333 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6334 {
6335 sljit_free_compiler(compiler);
6336 SLJIT_FREE(common->localptrs);
6337 return;
6338 }
6339 flush_stubs(common);
6340 common->currententry = common->currententry->next;
6341 }
6342
6343 /* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
6344 /* This is a (really) rare case. */
6345 set_jumps(common->stackalloc, LABEL());
6346 /* RETURN_ADDR is not a saved register. */
6347 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
6348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
6349 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6350 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6351 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top), STACK_TOP, 0);
6352 OP2(SLJIT_ADD, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit), SLJIT_IMM, STACK_GROWTH_RATE);
6353
6354 sljit_emit_ijump(compiler, SLJIT_CALL2, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
6355 alloc_error = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6356 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6358 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, top));
6359 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(struct sljit_stack, limit));
6360 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
6361 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6362
6363 /* Allocation failed. */
6364 JUMPHERE(alloc_error);
6365 /* We break the return address cache here, but this is a really rare case. */
6366 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT);
6367 JUMPTO(SLJIT_JUMP, leave);
6368
6369 /* Call limit reached. */
6370 set_jumps(common->calllimit, LABEL());
6371 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT);
6372 JUMPTO(SLJIT_JUMP, leave);
6373
6374 if (common->revertframes != NULL)
6375 {
6376 set_jumps(common->revertframes, LABEL());
6377 do_revertframes(common);
6378 }
6379 if (common->wordboundary != NULL)
6380 {
6381 set_jumps(common->wordboundary, LABEL());
6382 check_wordboundary(common);
6383 }
6384 if (common->anynewline != NULL)
6385 {
6386 set_jumps(common->anynewline, LABEL());
6387 check_anynewline(common);
6388 }
6389 if (common->hspace != NULL)
6390 {
6391 set_jumps(common->hspace, LABEL());
6392 check_hspace(common);
6393 }
6394 if (common->vspace != NULL)
6395 {
6396 set_jumps(common->vspace, LABEL());
6397 check_vspace(common);
6398 }
6399 if (common->casefulcmp != NULL)
6400 {
6401 set_jumps(common->casefulcmp, LABEL());
6402 do_casefulcmp(common);
6403 }
6404 if (common->caselesscmp != NULL)
6405 {
6406 set_jumps(common->caselesscmp, LABEL());
6407 do_caselesscmp(common);
6408 }
6409 #ifdef SUPPORT_UTF8
6410 if (common->utf8readchar != NULL)
6411 {
6412 set_jumps(common->utf8readchar, LABEL());
6413 do_utf8readchar(common);
6414 }
6415 if (common->utf8readtype8 != NULL)
6416 {
6417 set_jumps(common->utf8readtype8, LABEL());
6418 do_utf8readtype8(common);
6419 }
6420 #endif
6421 #ifdef SUPPORT_UCP
6422 if (common->getucd != NULL)
6423 {
6424 set_jumps(common->getucd, LABEL());
6425 do_getucd(common);
6426 }
6427 #endif
6428
6429 SLJIT_FREE(common->localptrs);
6430 executable_func = sljit_generate_code(compiler);
6431 sljit_free_compiler(compiler);
6432 if (executable_func == NULL)
6433 return;
6434
6435 function = SLJIT_MALLOC(sizeof(executable_function));
6436 if (function == NULL)
6437 {
6438 /* This case is highly unlikely since we just recently
6439 freed a lot of memory. Although not impossible. */
6440 sljit_free_code(executable_func);
6441 return;
6442 }
6443
6444 function->executable_func = executable_func;
6445 function->callback = NULL;
6446 function->userdata = NULL;
6447 extra->executable_jit = function;
6448 extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT;
6449 }
6450
6451 static int jit_machine_stack_exec(jit_arguments *arguments, executable_function *function)
6452 {
6453 union {
6454 void* executable_func;
6455 jit_function call_executable_func;
6456 } convert_executable_func;
6457 pcre_uint8 local_area[LOCAL_SPACE_SIZE];
6458 struct sljit_stack local_stack;
6459
6460 local_stack.top = (sljit_w)&local_area;
6461 local_stack.base = local_stack.top;
6462 local_stack.limit = local_stack.base + LOCAL_SPACE_SIZE;
6463 local_stack.max_limit = local_stack.limit;
6464 arguments->stack = &local_stack;
6465 convert_executable_func.executable_func = function->executable_func;
6466 return convert_executable_func.call_executable_func(arguments);
6467 }
6468
6469 int
6470 _pcre_jit_exec(const real_pcre *re, void *executable_func,
6471 PCRE_SPTR subject, int length, int start_offset, int options,
6472 int match_limit, int *offsets, int offsetcount)
6473 {
6474 executable_function *function = (executable_function*)executable_func;
6475 union {
6476 void* executable_func;
6477 jit_function call_executable_func;
6478 } convert_executable_func;
6479 jit_arguments arguments;
6480 int maxoffsetcount;
6481 int retval;
6482
6483 /* Sanity checks should be handled by pcre_exec. */
6484 arguments.stack = NULL;
6485 arguments.str = subject + start_offset;
6486 arguments.begin = subject;
6487 arguments.end = subject + length;
6488 arguments.calllimit = match_limit; /* JIT decreases this value less times. */
6489 arguments.notbol = (options & PCRE_NOTBOL) != 0;
6490 arguments.noteol = (options & PCRE_NOTEOL) != 0;
6491 arguments.notempty = (options & PCRE_NOTEMPTY) != 0;
6492 arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6493 arguments.offsets = offsets;
6494
6495 /* pcre_exec() rounds offsetcount to a multiple of 3, and then uses only 2/3 of
6496 the output vector for storing captured strings, with the remainder used as
6497 workspace. We don't need the workspace here. For compatibility, we limit the
6498 number of captured strings in the same way as pcre_exec(), so that the user
6499 gets the same result with and without JIT. */
6500
6501 offsetcount = ((offsetcount - (offsetcount % 3)) * 2)/3;
6502 maxoffsetcount = (re->top_bracket + 1) * 2;
6503 if (offsetcount > maxoffsetcount)
6504 offsetcount = maxoffsetcount;
6505 arguments.offsetcount = offsetcount;
6506
6507 if (function->callback)
6508 arguments.stack = (struct sljit_stack*)function->callback(function->userdata);
6509 else
6510 arguments.stack = (struct sljit_stack*)function->userdata;
6511
6512 if (arguments.stack == NULL)
6513 retval = jit_machine_stack_exec(&arguments, function);
6514 else
6515 {
6516 convert_executable_func.executable_func = function->executable_func;
6517 retval = convert_executable_func.call_executable_func(&arguments);
6518 }
6519
6520 if (retval * 2 > offsetcount)
6521 retval = 0;
6522 return retval;
6523 }
6524
6525 void
6526 _pcre_jit_free(void *executable_func)
6527 {
6528 executable_function *function = (executable_function*)executable_func;
6529 sljit_free_code(function->executable_func);
6530 SLJIT_FREE(function);
6531 }
6532
6533 PCRE_EXP_DECL pcre_jit_stack *
6534 pcre_jit_stack_alloc(int startsize, int maxsize)
6535 {
6536 if (startsize < 1 || maxsize < 1)
6537 return NULL;
6538 if (startsize > maxsize)
6539 startsize = maxsize;
6540 startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6541 maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1);
6542 return (pcre_jit_stack*)sljit_allocate_stack(startsize, maxsize);
6543 }
6544
6545 PCRE_EXP_DECL void
6546 pcre_jit_stack_free(pcre_jit_stack *stack)
6547 {
6548 sljit_free_stack((struct sljit_stack*)stack);
6549 }
6550
6551 PCRE_EXP_DECL void
6552 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6553 {
6554 executable_function *function;
6555 if (extra != NULL &&
6556 (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
6557 extra->executable_jit != NULL)
6558 {
6559 function = (executable_function*)extra->executable_jit;
6560 function->callback = callback;
6561 function->userdata = userdata;
6562 }
6563 }
6564
6565 #else /* SUPPORT_JIT */
6566
6567 /* These are dummy functions to avoid linking errors when JIT support is not
6568 being compiled. */
6569
6570 PCRE_EXP_DECL pcre_jit_stack *
6571 pcre_jit_stack_alloc(int startsize, int maxsize)
6572 {
6573 (void)startsize;
6574 (void)maxsize;
6575 return NULL;
6576 }
6577
6578 PCRE_EXP_DECL void
6579 pcre_jit_stack_free(pcre_jit_stack *stack)
6580 {
6581 (void)stack;
6582 }
6583
6584 PCRE_EXP_DECL void
6585 pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata)
6586 {
6587 (void)extra;
6588 (void)callback;
6589 (void)userdata;
6590 }
6591
6592 #endif
6593
6594 /* End of pcre_jit_compile.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12