/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1325 - (hide annotations) (download)
Fri May 10 14:03:18 2013 UTC (10 days, 16 hours ago) by ph10
File MIME type: text/plain
File size: 306686 byte(s)
Fix spelling mistakes in comments.

1 ph10 664 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 zherczeg 1268 Copyright (c) 1997-2013 University of Cambridge
10 ph10 664
11     The machine code generator part (this module) was written by Zoltan Herczeg
12 zherczeg 1268 Copyright (c) 2010-2013
13 ph10 664
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47     #include "pcre_internal.h"
48    
49 chpe 1055 #if defined SUPPORT_JIT
50 ph10 664
51     /* All-in-one: Since we use the JIT compiler only from here,
52     we just include it. This way we don't need to touch the build
53     system files. */
54    
55 ph10 836 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56     #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 ph10 664 #define SLJIT_CONFIG_AUTO 1
58 zherczeg 741 #define SLJIT_CONFIG_STATIC 1
59 ph10 664 #define SLJIT_VERBOSE 0
60     #define SLJIT_DEBUG 0
61    
62     #include "sljit/sljitLir.c"
63    
64     #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 ph10 836 #error Unsupported architecture
66 ph10 664 #endif
67    
68 zherczeg 1247 /* Defines for debugging purposes. */
69    
70     /* 1 - Use unoptimized capturing brackets.
71     2 - Enable capture_last_ptr (includes option 1). */
72     /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73    
74 zherczeg 1275 /* 1 - Always have a control head. */
75     /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76    
77 zherczeg 1002 /* Allocate memory for the regex stack on the real machine stack.
78     Fast, but limited size. */
79     #define MACHINE_STACK_SIZE 32768
80 ph10 664
81 zherczeg 1002 /* Growth rate for stack allocated by the OS. Should be the multiply
82     of page size. */
83 ph10 664 #define STACK_GROWTH_RATE 8192
84    
85     /* Enable to check that the allocation could destroy temporaries. */
86     #if defined SLJIT_DEBUG && SLJIT_DEBUG
87     #define DESTROY_REGISTERS 1
88     #endif
89    
90     /*
91     Short summary about the backtracking mechanism empolyed by the jit code generator:
92    
93     The code generator follows the recursive nature of the PERL compatible regular
94     expressions. The basic blocks of regular expressions are condition checkers
95     whose execute different commands depending on the result of the condition check.
96     The relationship between the operators can be horizontal (concatenation) and
97 zherczeg 970 vertical (sub-expression) (See struct backtrack_common for more details).
98 ph10 664
99     'ab' - 'a' and 'b' regexps are concatenated
100     'a+' - 'a' is the sub-expression of the '+' operator
101    
102     The condition checkers are boolean (true/false) checkers. Machine code is generated
103     for the checker itself and for the actions depending on the result of the checker.
104 zherczeg 999 The 'true' case is called as the matching path (expected path), and the other is called as
105 zherczeg 970 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 zherczeg 999 branches on the matching path.
107 ph10 664
108     Greedy star operator (*) :
109 zherczeg 999 Matching path: match happens.
110 zherczeg 970 Backtrack path: match failed.
111 ph10 664 Non-greedy star operator (*?) :
112 zherczeg 999 Matching path: no need to perform a match.
113 zherczeg 970 Backtrack path: match is required.
114 ph10 664
115     The following example shows how the code generated for a capturing bracket
116     with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117     we have the following regular expression:
118    
119     A(B|C)D
120    
121     The generated code will be the following:
122    
123 zherczeg 999 A matching path
124     '(' matching path (pushing arguments to the stack)
125     B matching path
126     ')' matching path (pushing arguments to the stack)
127     D matching path
128 ph10 664 return with successful match
129    
130 zherczeg 970 D backtrack path
131     ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132     B backtrack path
133 ph10 664 C expected path
134 zherczeg 999 jump to D matching path
135 zherczeg 970 C backtrack path
136     A backtrack path
137 ph10 691
138 zherczeg 970 Notice, that the order of backtrack code paths are the opposite of the fast
139 ph10 664 code paths. In this way the topmost value on the stack is always belong
140 zherczeg 970 to the current backtrack code path. The backtrack path must check
141 ph10 664 whether there is a next alternative. If so, it needs to jump back to
142 zherczeg 999 the matching path eventually. Otherwise it needs to clear out its own stack
143 zherczeg 970 frame and continue the execution on the backtrack code paths.
144 ph10 664 */
145    
146     /*
147     Saved stack frames:
148    
149 zherczeg 1002 Atomic blocks and asserts require reloading the values of private data
150     when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 ph10 664 are not necessarly known in compile time, thus we need a dynamic restore
152     mechanism.
153    
154     The stack frames are stored in a chain list, and have the following format:
155     ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156    
157 zherczeg 1002 Thus we can restore the private data to a particular point in the stack.
158 ph10 664 */
159    
160     typedef struct jit_arguments {
161     /* Pointers first. */
162     struct sljit_stack *stack;
163 ph10 836 const pcre_uchar *str;
164     const pcre_uchar *begin;
165     const pcre_uchar *end;
166 ph10 664 int *offsets;
167 zherczeg 929 pcre_uchar *uchar_ptr;
168     pcre_uchar *mark_ptr;
169 zherczeg 1245 void *callout_data;
170 ph10 664 /* Everything else after. */
171 zherczeg 1316 pcre_uint32 limit_match;
172 zherczeg 1252 int real_offset_count;
173 zherczeg 1245 int offset_count;
174 ph10 836 pcre_uint8 notbol;
175     pcre_uint8 noteol;
176     pcre_uint8 notempty;
177     pcre_uint8 notempty_atstart;
178 ph10 664 } jit_arguments;
179    
180 zherczeg 914 typedef struct executable_functions {
181 zherczeg 915 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 zherczeg 852 PUBL(jit_callback) callback;
183 ph10 664 void *userdata;
184 zherczeg 1187 pcre_uint32 top_bracket;
185 zherczeg 1316 pcre_uint32 limit_match;
186 zherczeg 915 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 zherczeg 914 } executable_functions;
188 ph10 664
189     typedef struct jump_list {
190     struct sljit_jump *jump;
191     struct jump_list *next;
192     } jump_list;
193    
194     typedef struct stub_list {
195     struct sljit_jump *start;
196 zherczeg 991 struct sljit_label *quit;
197 ph10 664 struct stub_list *next;
198     } stub_list;
199    
200 zherczeg 1275 enum frame_types {
201     no_frame = -1,
202     no_stack = -2
203     };
204 zherczeg 1249
205 zherczeg 1275 enum control_types {
206 zherczeg 1290 type_mark = 0,
207     type_then_trap = 1
208 zherczeg 1275 };
209    
210 ph10 664 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211    
212     /* The following structure is the key data type for the recursive
213 zherczeg 999 code generator. It is allocated by compile_matchingpath, and contains
214 ph10 1325 the arguments for compile_backtrackingpath. Must be the first member
215 ph10 664 of its descendants. */
216 zherczeg 970 typedef struct backtrack_common {
217 ph10 664 /* Concatenation stack. */
218 zherczeg 970 struct backtrack_common *prev;
219     jump_list *nextbacktracks;
220 ph10 664 /* Internal stack (for component operators). */
221 zherczeg 970 struct backtrack_common *top;
222     jump_list *topbacktracks;
223 ph10 664 /* Opcode pointer. */
224 ph10 836 pcre_uchar *cc;
225 zherczeg 970 } backtrack_common;
226 ph10 664
227 zherczeg 970 typedef struct assert_backtrack {
228     backtrack_common common;
229 ph10 664 jump_list *condfailed;
230 zherczeg 1276 /* Less than 0 if a frame is not needed. */
231 ph10 664 int framesize;
232     /* Points to our private memory word on the stack. */
233 zherczeg 1002 int private_data_ptr;
234 ph10 664 /* For iterators. */
235 zherczeg 999 struct sljit_label *matchingpath;
236 zherczeg 970 } assert_backtrack;
237 ph10 664
238 zherczeg 970 typedef struct bracket_backtrack {
239     backtrack_common common;
240 ph10 664 /* Where to coninue if an alternative is successfully matched. */
241 zherczeg 999 struct sljit_label *alternative_matchingpath;
242 ph10 664 /* For rmin and rmax iterators. */
243 zherczeg 999 struct sljit_label *recursive_matchingpath;
244 ph10 664 /* For greedy ? operator. */
245 zherczeg 999 struct sljit_label *zero_matchingpath;
246 ph10 664 /* Contains the branches of a failed condition. */
247     union {
248     /* Both for OP_COND, OP_SCOND. */
249     jump_list *condfailed;
250 zherczeg 970 assert_backtrack *assert;
251 zherczeg 1276 /* For OP_ONCE. Less than 0 if not needed. */
252 ph10 664 int framesize;
253     } u;
254     /* Points to our private memory word on the stack. */
255 zherczeg 1002 int private_data_ptr;
256 zherczeg 970 } bracket_backtrack;
257 ph10 664
258 zherczeg 970 typedef struct bracketpos_backtrack {
259     backtrack_common common;
260 ph10 664 /* Points to our private memory word on the stack. */
261 zherczeg 1002 int private_data_ptr;
262 ph10 664 /* Reverting stack is needed. */
263     int framesize;
264     /* Allocated stack size. */
265     int stacksize;
266 zherczeg 970 } bracketpos_backtrack;
267 ph10 664
268 zherczeg 970 typedef struct braminzero_backtrack {
269     backtrack_common common;
270 zherczeg 999 struct sljit_label *matchingpath;
271 zherczeg 970 } braminzero_backtrack;
272 ph10 664
273 zherczeg 970 typedef struct iterator_backtrack {
274     backtrack_common common;
275 ph10 664 /* Next iteration. */
276 zherczeg 999 struct sljit_label *matchingpath;
277 zherczeg 970 } iterator_backtrack;
278 ph10 664
279     typedef struct recurse_entry {
280     struct recurse_entry *next;
281     /* Contains the function entry. */
282     struct sljit_label *entry;
283     /* Collects the calls until the function is not created. */
284     jump_list *calls;
285     /* Points to the starting opcode. */
286 zherczeg 1282 sljit_sw start;
287 ph10 664 } recurse_entry;
288    
289 zherczeg 970 typedef struct recurse_backtrack {
290     backtrack_common common;
291 zherczeg 1249 BOOL inlined_pattern;
292 zherczeg 970 } recurse_backtrack;
293 ph10 664
294 zherczeg 1282 #define OP_THEN_TRAP OP_TABLE_LENGTH
295    
296 zherczeg 1279 typedef struct then_trap_backtrack {
297     backtrack_common common;
298 zherczeg 1282 /* If then_trap is not NULL, this structure contains the real
299     then_trap for the backtracking path. */
300 zherczeg 1279 struct then_trap_backtrack *then_trap;
301 zherczeg 1282 /* Points to the starting opcode. */
302     sljit_sw start;
303     /* Exit point for the then opcodes of this alternative. */
304 zherczeg 1279 jump_list *quit;
305 zherczeg 1282 /* Frame size of the current alternative. */
306 zherczeg 1279 int framesize;
307     } then_trap_backtrack;
308    
309 zherczeg 990 #define MAX_RANGE_SIZE 6
310    
311 ph10 664 typedef struct compiler_common {
312 zherczeg 1272 /* The sljit ceneric compiler. */
313 ph10 664 struct sljit_compiler *compiler;
314 zherczeg 1272 /* First byte code. */
315 ph10 836 pcre_uchar *start;
316 zherczeg 1002 /* Maps private data offset to each opcode. */
317 zherczeg 1306 sljit_si *private_data_ptrs;
318 zherczeg 1009 /* Tells whether the capturing bracket is optimized. */
319     pcre_uint8 *optimized_cbracket;
320 zherczeg 1279 /* Tells whether the starting offset is a target of then. */
321     pcre_uint8 *then_offsets;
322     /* Current position where a THEN must jump. */
323     then_trap_backtrack *then_trap;
324 zherczeg 1009 /* Starting offset of private data for capturing brackets. */
325 zherczeg 1272 int cbra_ptr;
326     /* Output vector starting point. Must be divisible by 2. */
327 zherczeg 920 int ovector_start;
328     /* Last known position of the requested byte. */
329     int req_char_ptr;
330     /* Head of the last recursion. */
331 zherczeg 1245 int recursive_head_ptr;
332 zherczeg 920 /* First inspected character for partial matching. */
333     int start_used_ptr;
334     /* Starting pointer for partial soft matches. */
335     int hit_start;
336     /* End pointer of the first line. */
337     int first_line_end;
338 zherczeg 929 /* Points to the marked string. */
339     int mark_ptr;
340 zherczeg 1275 /* Recursive control verb management chain. */
341     int control_head_ptr;
342 zherczeg 1245 /* Points to the last matched capture block index. */
343     int capture_last_ptr;
344 zherczeg 1272 /* Points to the starting position of the current match. */
345     int start_ptr;
346 zherczeg 920
347 zherczeg 990 /* Flipped and lower case tables. */
348 ph10 836 const pcre_uint8 *fcc;
349 zherczeg 1195 sljit_sw lcc;
350 zherczeg 990 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 zherczeg 914 int mode;
352 zherczeg 1279 /* \K is found in the pattern. */
353 zherczeg 1272 BOOL has_set_som;
354 zherczeg 1279 /* (*SKIP:arg) is found in the pattern. */
355 zherczeg 1278 BOOL has_skip_arg;
356 zherczeg 1279 /* (*THEN) is found in the pattern. */
357     BOOL has_then;
358 zherczeg 1272 /* Needs to know the start position anytime. */
359     BOOL needs_start_ptr;
360 zherczeg 1303 /* Currently in recurse or negative assert. */
361 zherczeg 1275 BOOL local_exit;
362 zherczeg 1303 /* Currently in a positive assert. */
363     BOOL positive_assert;
364 zherczeg 990 /* Newline control. */
365 ph10 664 int nltype;
366     int newline;
367     int bsr_nltype;
368 zherczeg 990 /* Dollar endonly. */
369 ph10 664 int endonly;
370 zherczeg 990 /* Tables. */
371 zherczeg 1195 sljit_sw ctypes;
372 zherczeg 990 int digits[2 + MAX_RANGE_SIZE];
373     /* Named capturing brackets. */
374 zherczeg 741 sljit_uw name_table;
375 zherczeg 1195 sljit_sw name_count;
376     sljit_sw name_entry_size;
377 zherczeg 920
378     /* Labels and jump lists. */
379 zherczeg 914 struct sljit_label *partialmatchlabel;
380 zherczeg 1245 struct sljit_label *quit_label;
381     struct sljit_label *forced_quit_label;
382     struct sljit_label *accept_label;
383 ph10 664 stub_list *stubs;
384     recurse_entry *entries;
385     recurse_entry *currententry;
386 zherczeg 914 jump_list *partialmatch;
387 zherczeg 991 jump_list *quit;
388 zherczeg 1303 jump_list *positive_assert_quit;
389 zherczeg 1245 jump_list *forced_quit;
390 ph10 664 jump_list *accept;
391 ph10 677 jump_list *calllimit;
392 ph10 664 jump_list *stackalloc;
393     jump_list *revertframes;
394     jump_list *wordboundary;
395     jump_list *anynewline;
396     jump_list *hspace;
397     jump_list *vspace;
398     jump_list *casefulcmp;
399     jump_list *caselesscmp;
400 zherczeg 1272 jump_list *reset_match;
401 ph10 664 BOOL jscript_compat;
402 ph10 836 #ifdef SUPPORT_UTF
403     BOOL utf;
404 ph10 664 #ifdef SUPPORT_UCP
405 ph10 836 BOOL use_ucp;
406 ph10 664 #endif
407 chpe 1055 #ifndef COMPILE_PCRE32
408 ph10 836 jump_list *utfreadchar;
409 chpe 1055 #endif
410 ph10 836 #ifdef COMPILE_PCRE8
411     jump_list *utfreadtype8;
412 ph10 664 #endif
413 ph10 836 #endif /* SUPPORT_UTF */
414 ph10 664 #ifdef SUPPORT_UCP
415     jump_list *getucd;
416     #endif
417     } compiler_common;
418    
419     /* For byte_sequence_compare. */
420    
421     typedef struct compare_context {
422     int length;
423     int sourcereg;
424     #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 ph10 836 int ucharptr;
426 ph10 664 union {
427 zherczeg 1195 sljit_si asint;
428 zherczeg 847 sljit_uh asushort;
429 chpe 1055 #if defined COMPILE_PCRE8
430 ph10 664 sljit_ub asbyte;
431 ph10 836 sljit_ub asuchars[4];
432 chpe 1055 #elif defined COMPILE_PCRE16
433 ph10 836 sljit_uh asuchars[2];
434 chpe 1055 #elif defined COMPILE_PCRE32
435     sljit_ui asuchars[1];
436 ph10 836 #endif
437 ph10 664 } c;
438     union {
439 zherczeg 1195 sljit_si asint;
440 zherczeg 847 sljit_uh asushort;
441 chpe 1055 #if defined COMPILE_PCRE8
442 ph10 664 sljit_ub asbyte;
443 ph10 836 sljit_ub asuchars[4];
444 chpe 1055 #elif defined COMPILE_PCRE16
445 ph10 836 sljit_uh asuchars[2];
446 chpe 1055 #elif defined COMPILE_PCRE32
447     sljit_ui asuchars[1];
448 ph10 836 #endif
449 ph10 664 } oc;
450     #endif
451     } compare_context;
452    
453 zherczeg 883 /* Undefine sljit macros. */
454     #undef CMP
455    
456 ph10 664 /* Used for accessing the elements of the stack. */
457 zherczeg 1195 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458 ph10 664
459 zherczeg 1215 #define TMP1 SLJIT_SCRATCH_REG1
460     #define TMP2 SLJIT_SCRATCH_REG3
461 ph10 664 #define TMP3 SLJIT_TEMPORARY_EREG2
462 zherczeg 880 #define STR_PTR SLJIT_SAVED_REG1
463     #define STR_END SLJIT_SAVED_REG2
464 zherczeg 1215 #define STACK_TOP SLJIT_SCRATCH_REG2
465 zherczeg 880 #define STACK_LIMIT SLJIT_SAVED_REG3
466     #define ARGUMENTS SLJIT_SAVED_EREG1
467 zherczeg 1316 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 ph10 664 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469    
470 zherczeg 1002 /* Local space layout. */
471 ph10 664 /* These two locals can be used by the current opcode. */
472 zherczeg 1195 #define LOCALS0 (0 * sizeof(sljit_sw))
473     #define LOCALS1 (1 * sizeof(sljit_sw))
474 ph10 664 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 zherczeg 1195 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476     #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 ph10 677 /* Max limit of recursions. */
478 zherczeg 1316 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 ph10 664 /* The output vector is stored on the stack, and contains pointers
480     to characters. The vector data is divided into two groups: the first
481     group contains the start / end character pointers, and the second is
482     the start pointers when the end of the capturing group has not yet reached. */
483 zherczeg 920 #define OVECTOR_START (common->ovector_start)
484 zherczeg 1195 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
485 zherczeg 1272 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
486 zherczeg 1002 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487 ph10 664
488 chpe 1055 #if defined COMPILE_PCRE8
489 ph10 836 #define MOV_UCHAR SLJIT_MOV_UB
490     #define MOVU_UCHAR SLJIT_MOVU_UB
491 chpe 1055 #elif defined COMPILE_PCRE16
492 ph10 836 #define MOV_UCHAR SLJIT_MOV_UH
493     #define MOVU_UCHAR SLJIT_MOVU_UH
494 chpe 1055 #elif defined COMPILE_PCRE32
495     #define MOV_UCHAR SLJIT_MOV_UI
496     #define MOVU_UCHAR SLJIT_MOVU_UI
497 ph10 836 #else
498     #error Unsupported compiling mode
499     #endif
500    
501 ph10 664 /* Shortcuts. */
502     #define DEFINE_COMPILER \
503     struct sljit_compiler *compiler = common->compiler
504     #define OP1(op, dst, dstw, src, srcw) \
505     sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506     #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507     sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508     #define LABEL() \
509     sljit_emit_label(compiler)
510     #define JUMP(type) \
511     sljit_emit_jump(compiler, (type))
512     #define JUMPTO(type, label) \
513     sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514     #define JUMPHERE(jump) \
515     sljit_set_label((jump), sljit_emit_label(compiler))
516 zherczeg 1246 #define SET_LABEL(jump, label) \
517     sljit_set_label((jump), (label))
518 ph10 664 #define CMP(type, src1, src1w, src2, src2w) \
519     sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520     #define CMPTO(type, src1, src1w, src2, src2w, label) \
521     sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 zherczeg 1209 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523     sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 zherczeg 955 #define GET_LOCAL_BASE(dst, dstw, offset) \
525     sljit_get_local_base(compiler, (dst), (dstw), (offset))
526 ph10 664
527 ph10 836 static pcre_uchar* bracketend(pcre_uchar* cc)
528 ph10 664 {
529     SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530     do cc += GET(cc, 1); while (*cc == OP_ALT);
531     SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532     cc += 1 + LINK_SIZE;
533     return cc;
534     }
535    
536 ph10 691 /* Functions whose might need modification for all new supported opcodes:
537 ph10 664 next_opcode
538 zherczeg 1306 check_opcode_types
539 zherczeg 1002 set_private_data_ptrs
540 ph10 664 get_framesize
541     init_frame
542 zherczeg 1276 get_private_data_copy_length
543 zherczeg 1002 copy_private_data
544 zherczeg 999 compile_matchingpath
545     compile_backtrackingpath
546 ph10 664 */
547    
548 ph10 836 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549 ph10 664 {
550     SLJIT_UNUSED_ARG(common);
551     switch(*cc)
552     {
553     case OP_SOD:
554     case OP_SOM:
555     case OP_SET_SOM:
556     case OP_NOT_WORD_BOUNDARY:
557     case OP_WORD_BOUNDARY:
558     case OP_NOT_DIGIT:
559     case OP_DIGIT:
560     case OP_NOT_WHITESPACE:
561     case OP_WHITESPACE:
562     case OP_NOT_WORDCHAR:
563     case OP_WORDCHAR:
564     case OP_ANY:
565     case OP_ALLANY:
566 zherczeg 1249 case OP_NOTPROP:
567     case OP_PROP:
568 ph10 664 case OP_ANYNL:
569     case OP_NOT_HSPACE:
570     case OP_HSPACE:
571     case OP_NOT_VSPACE:
572     case OP_VSPACE:
573     case OP_EXTUNI:
574     case OP_EODN:
575     case OP_EOD:
576     case OP_CIRC:
577     case OP_CIRCM:
578     case OP_DOLL:
579     case OP_DOLLM:
580     case OP_CRSTAR:
581     case OP_CRMINSTAR:
582     case OP_CRPLUS:
583     case OP_CRMINPLUS:
584     case OP_CRQUERY:
585     case OP_CRMINQUERY:
586 zherczeg 1249 case OP_CRRANGE:
587     case OP_CRMINRANGE:
588     case OP_CLASS:
589     case OP_NCLASS:
590     case OP_REF:
591     case OP_REFI:
592     case OP_RECURSE:
593     case OP_CALLOUT:
594     case OP_ALT:
595     case OP_KET:
596     case OP_KETRMAX:
597     case OP_KETRMIN:
598     case OP_KETRPOS:
599     case OP_REVERSE:
600     case OP_ASSERT:
601     case OP_ASSERT_NOT:
602     case OP_ASSERTBACK:
603     case OP_ASSERTBACK_NOT:
604     case OP_ONCE:
605     case OP_ONCE_NC:
606     case OP_BRA:
607     case OP_BRAPOS:
608     case OP_CBRA:
609     case OP_CBRAPOS:
610     case OP_COND:
611     case OP_SBRA:
612     case OP_SBRAPOS:
613     case OP_SCBRA:
614     case OP_SCBRAPOS:
615     case OP_SCOND:
616     case OP_CREF:
617     case OP_NCREF:
618     case OP_RREF:
619     case OP_NRREF:
620 ph10 664 case OP_DEF:
621     case OP_BRAZERO:
622     case OP_BRAMINZERO:
623     case OP_BRAPOSZERO:
624 zherczeg 1272 case OP_PRUNE:
625 zherczeg 1275 case OP_SKIP:
626 zherczeg 1279 case OP_THEN:
627 zherczeg 941 case OP_COMMIT:
628 ph10 664 case OP_FAIL:
629     case OP_ACCEPT:
630     case OP_ASSERT_ACCEPT:
631 zherczeg 1249 case OP_CLOSE:
632 ph10 664 case OP_SKIPZERO:
633 zherczeg 1249 return cc + PRIV(OP_lengths)[*cc];
634 ph10 664
635     case OP_CHAR:
636     case OP_CHARI:
637     case OP_NOT:
638     case OP_NOTI:
639     case OP_STAR:
640     case OP_MINSTAR:
641     case OP_PLUS:
642     case OP_MINPLUS:
643     case OP_QUERY:
644     case OP_MINQUERY:
645 zherczeg 1249 case OP_UPTO:
646     case OP_MINUPTO:
647     case OP_EXACT:
648 ph10 664 case OP_POSSTAR:
649     case OP_POSPLUS:
650     case OP_POSQUERY:
651 zherczeg 1249 case OP_POSUPTO:
652 ph10 664 case OP_STARI:
653     case OP_MINSTARI:
654     case OP_PLUSI:
655     case OP_MINPLUSI:
656     case OP_QUERYI:
657     case OP_MINQUERYI:
658 zherczeg 1249 case OP_UPTOI:
659     case OP_MINUPTOI:
660     case OP_EXACTI:
661 ph10 664 case OP_POSSTARI:
662     case OP_POSPLUSI:
663     case OP_POSQUERYI:
664 zherczeg 1249 case OP_POSUPTOI:
665 ph10 664 case OP_NOTSTAR:
666     case OP_NOTMINSTAR:
667     case OP_NOTPLUS:
668     case OP_NOTMINPLUS:
669     case OP_NOTQUERY:
670     case OP_NOTMINQUERY:
671 zherczeg 1249 case OP_NOTUPTO:
672     case OP_NOTMINUPTO:
673     case OP_NOTEXACT:
674 ph10 664 case OP_NOTPOSSTAR:
675     case OP_NOTPOSPLUS:
676     case OP_NOTPOSQUERY:
677 zherczeg 1249 case OP_NOTPOSUPTO:
678 ph10 664 case OP_NOTSTARI:
679     case OP_NOTMINSTARI:
680     case OP_NOTPLUSI:
681     case OP_NOTMINPLUSI:
682     case OP_NOTQUERYI:
683     case OP_NOTMINQUERYI:
684 zherczeg 1249 case OP_NOTUPTOI:
685     case OP_NOTMINUPTOI:
686     case OP_NOTEXACTI:
687 ph10 664 case OP_NOTPOSSTARI:
688     case OP_NOTPOSPLUSI:
689     case OP_NOTPOSQUERYI:
690     case OP_NOTPOSUPTOI:
691 zherczeg 1249 cc += PRIV(OP_lengths)[*cc];
692 chpe 1114 #ifdef SUPPORT_UTF
693 ph10 836 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
694 ph10 664 #endif
695     return cc;
696    
697 zherczeg 1249 /* Special cases. */
698     case OP_TYPESTAR:
699     case OP_TYPEMINSTAR:
700     case OP_TYPEPLUS:
701     case OP_TYPEMINPLUS:
702     case OP_TYPEQUERY:
703     case OP_TYPEMINQUERY:
704 ph10 664 case OP_TYPEUPTO:
705     case OP_TYPEMINUPTO:
706     case OP_TYPEEXACT:
707 zherczeg 1249 case OP_TYPEPOSSTAR:
708     case OP_TYPEPOSPLUS:
709     case OP_TYPEPOSQUERY:
710 ph10 664 case OP_TYPEPOSUPTO:
711 zherczeg 1249 return cc + PRIV(OP_lengths)[*cc] - 1;
712 ph10 664
713 zherczeg 1249 case OP_ANYBYTE:
714     #ifdef SUPPORT_UTF
715     if (common->utf) return NULL;
716     #endif
717     return cc + 1;
718 ph10 664
719 ph10 836 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
720 ph10 664 case OP_XCLASS:
721     return cc + GET(cc, 1);
722     #endif
723    
724 zherczeg 929 case OP_MARK:
725 zherczeg 1272 case OP_PRUNE_ARG:
726 zherczeg 1278 case OP_SKIP_ARG:
727 zherczeg 1279 case OP_THEN_ARG:
728 zherczeg 929 return cc + 1 + 2 + cc[1];
729    
730 ph10 664 default:
731 zherczeg 1282 /* All opcodes are supported now! */
732     SLJIT_ASSERT_STOP();
733 ph10 664 return NULL;
734     }
735     }
736    
737 zherczeg 1306 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
738 zherczeg 986 {
739 zherczeg 1009 pcre_uchar *name;
740 zherczeg 1306 pcre_uchar *name2;
741 zherczeg 1310 unsigned int cbra_index;
742     int i;
743 zherczeg 986
744 ph10 664 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
745     while (cc < ccend)
746     {
747     switch(*cc)
748     {
749 zherczeg 929 case OP_SET_SOM:
750     common->has_set_som = TRUE;
751     cc += 1;
752     break;
753    
754 zherczeg 1009 case OP_REF:
755     case OP_REFI:
756     common->optimized_cbracket[GET2(cc, 1)] = 0;
757     cc += 1 + IMM2_SIZE;
758     break;
759    
760 ph10 664 case OP_CBRAPOS:
761     case OP_SCBRAPOS:
762 zherczeg 1009 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
763 zherczeg 1306 cc += 1 + LINK_SIZE + IMM2_SIZE;
764 ph10 664 break;
765    
766     case OP_COND:
767 zherczeg 1009 case OP_SCOND:
768 zherczeg 1245 /* Only AUTO_CALLOUT can insert this opcode. We do
769     not intend to support this case. */
770     if (cc[1 + LINK_SIZE] == OP_CALLOUT)
771 zherczeg 1306 return FALSE;
772     cc += 1 + LINK_SIZE;
773 ph10 664 break;
774    
775 zherczeg 1245 case OP_CREF:
776     i = GET2(cc, 1);
777     common->optimized_cbracket[i] = 0;
778     cc += 1 + IMM2_SIZE;
779     break;
780    
781     case OP_NCREF:
782 zherczeg 1306 cbra_index = GET2(cc, 1);
783 zherczeg 1245 name = (pcre_uchar *)common->name_table;
784 zherczeg 1306 name2 = name;
785 zherczeg 1245 for (i = 0; i < common->name_count; i++)
786     {
787 zherczeg 1306 if (GET2(name, 0) == cbra_index) break;
788 zherczeg 1245 name += common->name_entry_size;
789     }
790     SLJIT_ASSERT(i != common->name_count);
791    
792     for (i = 0; i < common->name_count; i++)
793     {
794 zherczeg 1306 if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
795     common->optimized_cbracket[GET2(name2, 0)] = 0;
796     name2 += common->name_entry_size;
797 zherczeg 1245 }
798     cc += 1 + IMM2_SIZE;
799     break;
800    
801 zherczeg 920 case OP_RECURSE:
802     /* Set its value only once. */
803 zherczeg 1245 if (common->recursive_head_ptr == 0)
804 zherczeg 920 {
805 zherczeg 1245 common->recursive_head_ptr = common->ovector_start;
806 zherczeg 1195 common->ovector_start += sizeof(sljit_sw);
807 zherczeg 920 }
808     cc += 1 + LINK_SIZE;
809     break;
810    
811 zherczeg 1245 case OP_CALLOUT:
812     if (common->capture_last_ptr == 0)
813     {
814     common->capture_last_ptr = common->ovector_start;
815     common->ovector_start += sizeof(sljit_sw);
816     }
817     cc += 2 + 2 * LINK_SIZE;
818     break;
819    
820 zherczeg 1279 case OP_THEN_ARG:
821     common->has_then = TRUE;
822 zherczeg 1290 common->control_head_ptr = 1;
823 zherczeg 1279 /* Fall through. */
824    
825 zherczeg 1272 case OP_PRUNE_ARG:
826     common->needs_start_ptr = TRUE;
827     /* Fall through. */
828    
829 zherczeg 929 case OP_MARK:
830     if (common->mark_ptr == 0)
831     {
832     common->mark_ptr = common->ovector_start;
833 zherczeg 1195 common->ovector_start += sizeof(sljit_sw);
834 zherczeg 929 }
835     cc += 1 + 2 + cc[1];
836     break;
837    
838 zherczeg 1279 case OP_THEN:
839     common->has_then = TRUE;
840 zherczeg 1290 common->control_head_ptr = 1;
841 zherczeg 1279 /* Fall through. */
842    
843 zherczeg 1272 case OP_PRUNE:
844 zherczeg 1275 case OP_SKIP:
845 zherczeg 1272 common->needs_start_ptr = TRUE;
846     cc += 1;
847     break;
848    
849 zherczeg 1278 case OP_SKIP_ARG:
850     common->control_head_ptr = 1;
851     common->has_skip_arg = TRUE;
852     cc += 1 + 2 + cc[1];
853     break;
854    
855 ph10 664 default:
856     cc = next_opcode(common, cc);
857     if (cc == NULL)
858 zherczeg 1306 return FALSE;
859 ph10 664 break;
860     }
861 zherczeg 1306 }
862     return TRUE;
863     }
864 zherczeg 986
865 zherczeg 1306 static int get_class_iterator_size(pcre_uchar *cc)
866     {
867     switch(*cc)
868     {
869     case OP_CRSTAR:
870     case OP_CRPLUS:
871     return 2;
872 zherczeg 986
873 zherczeg 1306 case OP_CRMINSTAR:
874     case OP_CRMINPLUS:
875     case OP_CRQUERY:
876     case OP_CRMINQUERY:
877     return 1;
878    
879     case OP_CRRANGE:
880     case OP_CRMINRANGE:
881     if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
882     return 0;
883     return 2;
884    
885     default:
886     return 0;
887     }
888     }
889    
890     static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
891     {
892     pcre_uchar *end = bracketend(begin);
893     pcre_uchar *next;
894     pcre_uchar *next_end;
895     pcre_uchar *max_end;
896     pcre_uchar type;
897 zherczeg 1310 sljit_sw length = end - begin;
898 zherczeg 1306 int min, max, i;
899    
900     /* Detect fixed iterations first. */
901     if (end[-(1 + LINK_SIZE)] != OP_KET)
902     return FALSE;
903    
904     /* Already detected repeat. */
905     if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
906     return TRUE;
907    
908     next = end;
909     min = 1;
910     while (1)
911     {
912     if (*next != *begin)
913     break;
914     next_end = bracketend(next);
915     if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
916     break;
917     next = next_end;
918     min++;
919     }
920    
921     if (min == 2)
922     return FALSE;
923    
924     max = 0;
925     max_end = next;
926     if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
927     {
928     type = *next;
929     while (1)
930 zherczeg 986 {
931 zherczeg 1306 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
932     break;
933     next_end = bracketend(next + 2 + LINK_SIZE);
934     if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
935     break;
936     next = next_end;
937     max++;
938 zherczeg 986 }
939    
940 zherczeg 1306 if (next[0] == type && next[1] == *begin && max >= 1)
941 zherczeg 986 {
942 zherczeg 1306 next_end = bracketend(next + 1);
943     if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
944 zherczeg 986 {
945 zherczeg 1306 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
946     if (*next_end != OP_KET)
947     break;
948    
949     if (i == max)
950     {
951     common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
952     common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
953     /* +2 the original and the last. */
954     common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
955     if (min == 1)
956     return TRUE;
957     min--;
958     max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
959     }
960 zherczeg 986 }
961     }
962 ph10 664 }
963 zherczeg 1306
964     if (min >= 3)
965     {
966     common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
967     common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
968     common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
969     return TRUE;
970     }
971    
972     return FALSE;
973 ph10 664 }
974    
975 zherczeg 1306 #define CASE_ITERATOR_PRIVATE_DATA_1 \
976     case OP_MINSTAR: \
977     case OP_MINPLUS: \
978     case OP_QUERY: \
979     case OP_MINQUERY: \
980     case OP_MINSTARI: \
981     case OP_MINPLUSI: \
982     case OP_QUERYI: \
983     case OP_MINQUERYI: \
984     case OP_NOTMINSTAR: \
985     case OP_NOTMINPLUS: \
986     case OP_NOTQUERY: \
987     case OP_NOTMINQUERY: \
988     case OP_NOTMINSTARI: \
989     case OP_NOTMINPLUSI: \
990     case OP_NOTQUERYI: \
991     case OP_NOTMINQUERYI:
992    
993     #define CASE_ITERATOR_PRIVATE_DATA_2A \
994     case OP_STAR: \
995     case OP_PLUS: \
996     case OP_STARI: \
997     case OP_PLUSI: \
998     case OP_NOTSTAR: \
999     case OP_NOTPLUS: \
1000     case OP_NOTSTARI: \
1001     case OP_NOTPLUSI:
1002    
1003     #define CASE_ITERATOR_PRIVATE_DATA_2B \
1004     case OP_UPTO: \
1005     case OP_MINUPTO: \
1006     case OP_UPTOI: \
1007     case OP_MINUPTOI: \
1008     case OP_NOTUPTO: \
1009     case OP_NOTMINUPTO: \
1010     case OP_NOTUPTOI: \
1011     case OP_NOTMINUPTOI:
1012    
1013     #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1014     case OP_TYPEMINSTAR: \
1015     case OP_TYPEMINPLUS: \
1016     case OP_TYPEQUERY: \
1017     case OP_TYPEMINQUERY:
1018    
1019     #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1020     case OP_TYPESTAR: \
1021     case OP_TYPEPLUS:
1022    
1023     #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1024     case OP_TYPEUPTO: \
1025     case OP_TYPEMINUPTO:
1026    
1027     static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1028 ph10 664 {
1029 ph10 836 pcre_uchar *cc = common->start;
1030     pcre_uchar *alternative;
1031 zherczeg 986 pcre_uchar *end = NULL;
1032 zherczeg 1306 int private_data_ptr = *private_data_start;
1033 zherczeg 986 int space, size, bracketlen;
1034    
1035 ph10 664 while (cc < ccend)
1036     {
1037 zherczeg 986 space = 0;
1038     size = 0;
1039     bracketlen = 0;
1040 zherczeg 1306 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1041     return;
1042    
1043 zherczeg 1308 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1044 zherczeg 1306 if (detect_repeat(common, cc))
1045     {
1046     /* These brackets are converted to repeats, so no global
1047     based single character repeat is allowed. */
1048     if (cc >= end)
1049     end = bracketend(cc);
1050     }
1051    
1052 ph10 664 switch(*cc)
1053     {
1054 zherczeg 1306 case OP_KET:
1055     if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1056     {
1057     common->private_data_ptrs[cc - common->start] = private_data_ptr;
1058     private_data_ptr += sizeof(sljit_sw);
1059     cc += common->private_data_ptrs[cc + 1 - common->start];
1060     }
1061     cc += 1 + LINK_SIZE;
1062     break;
1063    
1064 ph10 664 case OP_ASSERT:
1065     case OP_ASSERT_NOT:
1066     case OP_ASSERTBACK:
1067     case OP_ASSERTBACK_NOT:
1068     case OP_ONCE:
1069 zherczeg 726 case OP_ONCE_NC:
1070 ph10 664 case OP_BRAPOS:
1071     case OP_SBRA:
1072     case OP_SBRAPOS:
1073     case OP_SCOND:
1074 zherczeg 1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1075 zherczeg 1195 private_data_ptr += sizeof(sljit_sw);
1076 zherczeg 986 bracketlen = 1 + LINK_SIZE;
1077 ph10 664 break;
1078    
1079     case OP_CBRAPOS:
1080     case OP_SCBRAPOS:
1081 zherczeg 1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1082 zherczeg 1195 private_data_ptr += sizeof(sljit_sw);
1083 zherczeg 986 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1084 ph10 664 break;
1085    
1086     case OP_COND:
1087     /* Might be a hidden SCOND. */
1088     alternative = cc + GET(cc, 1);
1089     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1090     {
1091 zherczeg 1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1092 zherczeg 1195 private_data_ptr += sizeof(sljit_sw);
1093 ph10 664 }
1094 zherczeg 986 bracketlen = 1 + LINK_SIZE;
1095 ph10 664 break;
1096    
1097 zherczeg 986 case OP_BRA:
1098     bracketlen = 1 + LINK_SIZE;
1099     break;
1100    
1101     case OP_CBRA:
1102     case OP_SCBRA:
1103     bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1104     break;
1105    
1106 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_1
1107 zherczeg 986 space = 1;
1108     size = -2;
1109     break;
1110    
1111 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2A
1112 zherczeg 986 space = 2;
1113     size = -2;
1114     break;
1115    
1116 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2B
1117 zherczeg 986 space = 2;
1118     size = -(2 + IMM2_SIZE);
1119     break;
1120    
1121 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1122 zherczeg 986 space = 1;
1123     size = 1;
1124     break;
1125    
1126 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1127 zherczeg 986 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1128     space = 2;
1129     size = 1;
1130     break;
1131    
1132 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1133 zherczeg 986 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1134     space = 2;
1135     size = 1 + IMM2_SIZE;
1136     break;
1137    
1138     case OP_CLASS:
1139     case OP_NCLASS:
1140     size += 1 + 32 / sizeof(pcre_uchar);
1141     space = get_class_iterator_size(cc + size);
1142     break;
1143    
1144     #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1145     case OP_XCLASS:
1146     size = GET(cc, 1);
1147     space = get_class_iterator_size(cc + size);
1148     break;
1149     #endif
1150    
1151 ph10 664 default:
1152     cc = next_opcode(common, cc);
1153     SLJIT_ASSERT(cc != NULL);
1154     break;
1155     }
1156 zherczeg 986
1157 zherczeg 1306 /* Character iterators, which are not inside a repeated bracket,
1158     gets a private slot instead of allocating it on the stack. */
1159 zherczeg 986 if (space > 0 && cc >= end)
1160     {
1161 zherczeg 1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1162 zherczeg 1195 private_data_ptr += sizeof(sljit_sw) * space;
1163 zherczeg 986 }
1164    
1165     if (size != 0)
1166     {
1167     if (size < 0)
1168     {
1169     cc += -size;
1170 chpe 1114 #ifdef SUPPORT_UTF
1171 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1172     #endif
1173     }
1174     else
1175     cc += size;
1176     }
1177    
1178     if (bracketlen > 0)
1179     {
1180     if (cc >= end)
1181     {
1182     end = bracketend(cc);
1183     if (end[-1 - LINK_SIZE] == OP_KET)
1184     end = NULL;
1185     }
1186     cc += bracketlen;
1187     }
1188 ph10 664 }
1189 zherczeg 1306 *private_data_start = private_data_ptr;
1190 ph10 664 }
1191    
1192 zherczeg 1249 /* Returns with a frame_types (always < 0) if no need for frame. */
1193 zherczeg 1279 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1194 ph10 664 {
1195     int length = 0;
1196 zherczeg 1246 int possessive = 0;
1197 zherczeg 1249 BOOL stack_restore = FALSE;
1198 zherczeg 929 BOOL setsom_found = recursive;
1199     BOOL setmark_found = recursive;
1200 zherczeg 1247 /* The last capture is a local variable even for recursions. */
1201     BOOL capture_last_found = FALSE;
1202 ph10 664
1203 zherczeg 1277 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1204     SLJIT_ASSERT(common->control_head_ptr != 0);
1205     *needs_control_head = TRUE;
1206     #else
1207     *needs_control_head = FALSE;
1208     #endif
1209    
1210 zherczeg 1279 if (ccend == NULL)
1211 ph10 664 {
1212 zherczeg 1279 ccend = bracketend(cc) - (1 + LINK_SIZE);
1213     if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1214     {
1215     possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1216     /* This is correct regardless of common->capture_last_ptr. */
1217     capture_last_found = TRUE;
1218     }
1219     cc = next_opcode(common, cc);
1220 ph10 664 }
1221    
1222     SLJIT_ASSERT(cc != NULL);
1223     while (cc < ccend)
1224     switch(*cc)
1225     {
1226     case OP_SET_SOM:
1227 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
1228 zherczeg 1249 stack_restore = TRUE;
1229 ph10 664 if (!setsom_found)
1230     {
1231     length += 2;
1232     setsom_found = TRUE;
1233     }
1234 zherczeg 929 cc += 1;
1235 ph10 664 break;
1236    
1237 zherczeg 929 case OP_MARK:
1238 zherczeg 1272 case OP_PRUNE_ARG:
1239 zherczeg 1279 case OP_THEN_ARG:
1240 zherczeg 929 SLJIT_ASSERT(common->mark_ptr != 0);
1241 zherczeg 1249 stack_restore = TRUE;
1242 zherczeg 929 if (!setmark_found)
1243     {
1244     length += 2;
1245     setmark_found = TRUE;
1246     }
1247 zherczeg 1277 if (common->control_head_ptr != 0)
1248     *needs_control_head = TRUE;
1249 zherczeg 929 cc += 1 + 2 + cc[1];
1250     break;
1251    
1252     case OP_RECURSE:
1253 zherczeg 1249 stack_restore = TRUE;
1254 zherczeg 929 if (common->has_set_som && !setsom_found)
1255     {
1256     length += 2;
1257     setsom_found = TRUE;
1258     }
1259     if (common->mark_ptr != 0 && !setmark_found)
1260     {
1261     length += 2;
1262     setmark_found = TRUE;
1263     }
1264 zherczeg 1246 if (common->capture_last_ptr != 0 && !capture_last_found)
1265     {
1266     length += 2;
1267     capture_last_found = TRUE;
1268     }
1269 zherczeg 929 cc += 1 + LINK_SIZE;
1270     break;
1271    
1272 ph10 664 case OP_CBRA:
1273     case OP_CBRAPOS:
1274     case OP_SCBRA:
1275     case OP_SCBRAPOS:
1276 zherczeg 1249 stack_restore = TRUE;
1277 zherczeg 1246 if (common->capture_last_ptr != 0 && !capture_last_found)
1278     {
1279     length += 2;
1280     capture_last_found = TRUE;
1281     }
1282 ph10 664 length += 3;
1283 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1284 ph10 664 break;
1285    
1286     default:
1287 zherczeg 1249 stack_restore = TRUE;
1288     /* Fall through. */
1289    
1290     case OP_NOT_WORD_BOUNDARY:
1291     case OP_WORD_BOUNDARY:
1292     case OP_NOT_DIGIT:
1293     case OP_DIGIT:
1294     case OP_NOT_WHITESPACE:
1295     case OP_WHITESPACE:
1296     case OP_NOT_WORDCHAR:
1297     case OP_WORDCHAR:
1298     case OP_ANY:
1299     case OP_ALLANY:
1300     case OP_ANYBYTE:
1301     case OP_NOTPROP:
1302     case OP_PROP:
1303     case OP_ANYNL:
1304     case OP_NOT_HSPACE:
1305     case OP_HSPACE:
1306     case OP_NOT_VSPACE:
1307     case OP_VSPACE:
1308     case OP_EXTUNI:
1309     case OP_EODN:
1310     case OP_EOD:
1311     case OP_CIRC:
1312     case OP_CIRCM:
1313     case OP_DOLL:
1314     case OP_DOLLM:
1315     case OP_CHAR:
1316     case OP_CHARI:
1317     case OP_NOT:
1318     case OP_NOTI:
1319    
1320     case OP_EXACT:
1321     case OP_POSSTAR:
1322     case OP_POSPLUS:
1323     case OP_POSQUERY:
1324     case OP_POSUPTO:
1325    
1326     case OP_EXACTI:
1327     case OP_POSSTARI:
1328     case OP_POSPLUSI:
1329     case OP_POSQUERYI:
1330     case OP_POSUPTOI:
1331    
1332     case OP_NOTEXACT:
1333     case OP_NOTPOSSTAR:
1334     case OP_NOTPOSPLUS:
1335     case OP_NOTPOSQUERY:
1336     case OP_NOTPOSUPTO:
1337    
1338     case OP_NOTEXACTI:
1339     case OP_NOTPOSSTARI:
1340     case OP_NOTPOSPLUSI:
1341     case OP_NOTPOSQUERYI:
1342     case OP_NOTPOSUPTOI:
1343    
1344     case OP_TYPEEXACT:
1345     case OP_TYPEPOSSTAR:
1346     case OP_TYPEPOSPLUS:
1347     case OP_TYPEPOSQUERY:
1348     case OP_TYPEPOSUPTO:
1349    
1350     case OP_CLASS:
1351     case OP_NCLASS:
1352     case OP_XCLASS:
1353    
1354 ph10 664 cc = next_opcode(common, cc);
1355     SLJIT_ASSERT(cc != NULL);
1356     break;
1357     }
1358    
1359     /* Possessive quantifiers can use a special case. */
1360 zherczeg 1246 if (SLJIT_UNLIKELY(possessive == length))
1361 zherczeg 1249 return stack_restore ? no_frame : no_stack;
1362 ph10 664
1363     if (length > 0)
1364 zherczeg 726 return length + 1;
1365 zherczeg 1249 return stack_restore ? no_frame : no_stack;
1366 ph10 664 }
1367    
1368 zherczeg 1279 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1369 ph10 664 {
1370     DEFINE_COMPILER;
1371 zherczeg 929 BOOL setsom_found = recursive;
1372     BOOL setmark_found = recursive;
1373 zherczeg 1247 /* The last capture is a local variable even for recursions. */
1374     BOOL capture_last_found = FALSE;
1375 ph10 664 int offset;
1376    
1377 zherczeg 726 /* >= 1 + shortest item size (2) */
1378 zherczeg 906 SLJIT_UNUSED_ARG(stacktop);
1379 zherczeg 726 SLJIT_ASSERT(stackpos >= stacktop + 2);
1380 ph10 664
1381     stackpos = STACK(stackpos);
1382 zherczeg 1279 if (ccend == NULL)
1383     {
1384     ccend = bracketend(cc) - (1 + LINK_SIZE);
1385     if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1386     cc = next_opcode(common, cc);
1387     }
1388    
1389 ph10 664 SLJIT_ASSERT(cc != NULL);
1390     while (cc < ccend)
1391     switch(*cc)
1392     {
1393     case OP_SET_SOM:
1394 zherczeg 929 SLJIT_ASSERT(common->has_set_som);
1395 ph10 664 if (!setsom_found)
1396     {
1397     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1398 zherczeg 1246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1399 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1400 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1401 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1402 ph10 664 setsom_found = TRUE;
1403     }
1404 zherczeg 929 cc += 1;
1405 ph10 664 break;
1406    
1407 zherczeg 929 case OP_MARK:
1408 zherczeg 1272 case OP_PRUNE_ARG:
1409 zherczeg 1279 case OP_THEN_ARG:
1410 zherczeg 929 SLJIT_ASSERT(common->mark_ptr != 0);
1411     if (!setmark_found)
1412     {
1413     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1414 zherczeg 1246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1415 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1416 zherczeg 929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1417 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1418 zherczeg 929 setmark_found = TRUE;
1419     }
1420     cc += 1 + 2 + cc[1];
1421     break;
1422    
1423     case OP_RECURSE:
1424     if (common->has_set_som && !setsom_found)
1425     {
1426     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1427 zherczeg 1246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1428 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1429 zherczeg 929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1430 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1431 zherczeg 929 setsom_found = TRUE;
1432     }
1433     if (common->mark_ptr != 0 && !setmark_found)
1434     {
1435     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1436 zherczeg 1246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1437 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1438 zherczeg 929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1439 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1440 zherczeg 929 setmark_found = TRUE;
1441     }
1442 zherczeg 1246 if (common->capture_last_ptr != 0 && !capture_last_found)
1443     {
1444     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1445     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1446     stackpos += (int)sizeof(sljit_sw);
1447     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448     stackpos += (int)sizeof(sljit_sw);
1449     capture_last_found = TRUE;
1450     }
1451 zherczeg 929 cc += 1 + LINK_SIZE;
1452     break;
1453    
1454 ph10 664 case OP_CBRA:
1455     case OP_CBRAPOS:
1456     case OP_SCBRA:
1457     case OP_SCBRAPOS:
1458 zherczeg 1246 if (common->capture_last_ptr != 0 && !capture_last_found)
1459     {
1460     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1461     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1462     stackpos += (int)sizeof(sljit_sw);
1463     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464     stackpos += (int)sizeof(sljit_sw);
1465     capture_last_found = TRUE;
1466     }
1467 ph10 664 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1468     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1469 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1470 ph10 664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1471     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1472     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1474 ph10 664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1475 zherczeg 1195 stackpos += (int)sizeof(sljit_sw);
1476 ph10 664
1477 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1478 ph10 664 break;
1479    
1480     default:
1481     cc = next_opcode(common, cc);
1482     SLJIT_ASSERT(cc != NULL);
1483     break;
1484     }
1485    
1486 zherczeg 1246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1487 zherczeg 726 SLJIT_ASSERT(stackpos == STACK(stacktop));
1488 ph10 664 }
1489    
1490 zherczeg 1276 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1491 ph10 664 {
1492 zherczeg 1276 int private_data_length = needs_control_head ? 3 : 2;
1493 zherczeg 986 int size;
1494 ph10 836 pcre_uchar *alternative;
1495 zherczeg 1002 /* Calculate the sum of the private machine words. */
1496 ph10 664 while (cc < ccend)
1497     {
1498 zherczeg 986 size = 0;
1499 ph10 664 switch(*cc)
1500     {
1501 zherczeg 1308 case OP_KET:
1502     if (PRIVATE_DATA(cc) != 0)
1503     private_data_length++;
1504     cc += 1 + LINK_SIZE;
1505     break;
1506    
1507 ph10 664 case OP_ASSERT:
1508     case OP_ASSERT_NOT:
1509     case OP_ASSERTBACK:
1510     case OP_ASSERTBACK_NOT:
1511     case OP_ONCE:
1512 zherczeg 726 case OP_ONCE_NC:
1513 ph10 664 case OP_BRAPOS:
1514     case OP_SBRA:
1515     case OP_SBRAPOS:
1516     case OP_SCOND:
1517 zherczeg 1002 private_data_length++;
1518 ph10 664 cc += 1 + LINK_SIZE;
1519     break;
1520    
1521     case OP_CBRA:
1522     case OP_SCBRA:
1523 zherczeg 1009 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1524     private_data_length++;
1525 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1526 ph10 664 break;
1527    
1528     case OP_CBRAPOS:
1529     case OP_SCBRAPOS:
1530 zherczeg 1002 private_data_length += 2;
1531 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1532 ph10 664 break;
1533    
1534     case OP_COND:
1535     /* Might be a hidden SCOND. */
1536     alternative = cc + GET(cc, 1);
1537     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1538 zherczeg 1002 private_data_length++;
1539 ph10 664 cc += 1 + LINK_SIZE;
1540     break;
1541    
1542 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_1
1543     if (PRIVATE_DATA(cc))
1544     private_data_length++;
1545 zherczeg 986 cc += 2;
1546 chpe 1114 #ifdef SUPPORT_UTF
1547 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1548     #endif
1549     break;
1550    
1551 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2A
1552     if (PRIVATE_DATA(cc))
1553     private_data_length += 2;
1554 zherczeg 986 cc += 2;
1555 chpe 1114 #ifdef SUPPORT_UTF
1556 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1557     #endif
1558     break;
1559    
1560 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2B
1561     if (PRIVATE_DATA(cc))
1562     private_data_length += 2;
1563 zherczeg 986 cc += 2 + IMM2_SIZE;
1564 chpe 1114 #ifdef SUPPORT_UTF
1565 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1566     #endif
1567     break;
1568    
1569 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1570     if (PRIVATE_DATA(cc))
1571     private_data_length++;
1572 zherczeg 986 cc += 1;
1573     break;
1574    
1575 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1576     if (PRIVATE_DATA(cc))
1577     private_data_length += 2;
1578 zherczeg 986 cc += 1;
1579     break;
1580    
1581 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1582     if (PRIVATE_DATA(cc))
1583     private_data_length += 2;
1584 zherczeg 986 cc += 1 + IMM2_SIZE;
1585     break;
1586    
1587     case OP_CLASS:
1588     case OP_NCLASS:
1589     #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1590     case OP_XCLASS:
1591 zherczeg 989 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1592 zherczeg 986 #else
1593 zherczeg 989 size = 1 + 32 / (int)sizeof(pcre_uchar);
1594 zherczeg 986 #endif
1595 zherczeg 1002 if (PRIVATE_DATA(cc))
1596     private_data_length += get_class_iterator_size(cc + size);
1597 zherczeg 986 cc += size;
1598     break;
1599    
1600 ph10 664 default:
1601     cc = next_opcode(common, cc);
1602     SLJIT_ASSERT(cc != NULL);
1603     break;
1604     }
1605     }
1606     SLJIT_ASSERT(cc == ccend);
1607 zherczeg 1002 return private_data_length;
1608 ph10 664 }
1609    
1610 zherczeg 1002 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1611 zherczeg 1276 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1612 ph10 664 {
1613     DEFINE_COMPILER;
1614     int srcw[2];
1615 zherczeg 986 int count, size;
1616 ph10 664 BOOL tmp1next = TRUE;
1617     BOOL tmp1empty = TRUE;
1618     BOOL tmp2empty = TRUE;
1619 ph10 836 pcre_uchar *alternative;
1620 ph10 664 enum {
1621     start,
1622     loop,
1623     end
1624     } status;
1625    
1626     status = save ? start : loop;
1627     stackptr = STACK(stackptr - 2);
1628     stacktop = STACK(stacktop - 1);
1629    
1630     if (!save)
1631     {
1632 zherczeg 1276 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1633 ph10 664 if (stackptr < stacktop)
1634     {
1635     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1636 zherczeg 1195 stackptr += sizeof(sljit_sw);
1637 ph10 664 tmp1empty = FALSE;
1638     }
1639     if (stackptr < stacktop)
1640     {
1641     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1642 zherczeg 1195 stackptr += sizeof(sljit_sw);
1643 ph10 664 tmp2empty = FALSE;
1644     }
1645     /* The tmp1next must be TRUE in either way. */
1646     }
1647    
1648 zherczeg 1275 do
1649 ph10 664 {
1650     count = 0;
1651     switch(status)
1652     {
1653     case start:
1654 zherczeg 1245 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1655 ph10 664 count = 1;
1656 zherczeg 1245 srcw[0] = common->recursive_head_ptr;
1657 zherczeg 1276 if (needs_control_head)
1658 zherczeg 1275 {
1659 zherczeg 1276 SLJIT_ASSERT(common->control_head_ptr != 0);
1660 zherczeg 1275 count = 2;
1661     srcw[1] = common->control_head_ptr;
1662     }
1663 ph10 664 status = loop;
1664     break;
1665    
1666     case loop:
1667     if (cc >= ccend)
1668     {
1669     status = end;
1670     break;
1671     }
1672    
1673     switch(*cc)
1674     {
1675 zherczeg 1308 case OP_KET:
1676     if (PRIVATE_DATA(cc) != 0)
1677     {
1678     count = 1;
1679     srcw[0] = PRIVATE_DATA(cc);
1680     }
1681     cc += 1 + LINK_SIZE;
1682     break;
1683    
1684 ph10 664 case OP_ASSERT:
1685     case OP_ASSERT_NOT:
1686     case OP_ASSERTBACK:
1687     case OP_ASSERTBACK_NOT:
1688     case OP_ONCE:
1689 zherczeg 726 case OP_ONCE_NC:
1690 ph10 664 case OP_BRAPOS:
1691     case OP_SBRA:
1692     case OP_SBRAPOS:
1693     case OP_SCOND:
1694     count = 1;
1695 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1696 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1697     cc += 1 + LINK_SIZE;
1698     break;
1699    
1700     case OP_CBRA:
1701     case OP_SCBRA:
1702 zherczeg 1009 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1703     {
1704     count = 1;
1705     srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1706     }
1707 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1708 ph10 664 break;
1709    
1710     case OP_CBRAPOS:
1711     case OP_SCBRAPOS:
1712     count = 2;
1713 zherczeg 1009 srcw[0] = PRIVATE_DATA(cc);
1714     srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1715     SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1716 ph10 836 cc += 1 + LINK_SIZE + IMM2_SIZE;
1717 ph10 664 break;
1718    
1719     case OP_COND:
1720     /* Might be a hidden SCOND. */
1721     alternative = cc + GET(cc, 1);
1722     if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1723     {
1724     count = 1;
1725 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1726 ph10 664 SLJIT_ASSERT(srcw[0] != 0);
1727     }
1728     cc += 1 + LINK_SIZE;
1729     break;
1730    
1731 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_1
1732     if (PRIVATE_DATA(cc))
1733 zherczeg 986 {
1734     count = 1;
1735 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1736 zherczeg 986 }
1737     cc += 2;
1738 chpe 1114 #ifdef SUPPORT_UTF
1739 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1740     #endif
1741     break;
1742    
1743 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2A
1744     if (PRIVATE_DATA(cc))
1745 zherczeg 986 {
1746     count = 2;
1747 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1748 zherczeg 1195 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1749 zherczeg 986 }
1750     cc += 2;
1751 chpe 1114 #ifdef SUPPORT_UTF
1752 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1753     #endif
1754     break;
1755    
1756 zherczeg 1002 CASE_ITERATOR_PRIVATE_DATA_2B
1757     if (PRIVATE_DATA(cc))
1758 zherczeg 986 {
1759     count = 2;
1760 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1761 zherczeg 1195 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1762 zherczeg 986 }
1763     cc += 2 + IMM2_SIZE;
1764 chpe 1114 #ifdef SUPPORT_UTF
1765 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1766     #endif
1767     break;
1768    
1769 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1770     if (PRIVATE_DATA(cc))
1771 zherczeg 986 {
1772     count = 1;
1773 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1774 zherczeg 986 }
1775     cc += 1;
1776     break;
1777    
1778 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1779     if (PRIVATE_DATA(cc))
1780 zherczeg 986 {
1781     count = 2;
1782 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1783 zherczeg 1195 srcw[1] = srcw[0] + sizeof(sljit_sw);
1784 zherczeg 986 }
1785     cc += 1;
1786     break;
1787    
1788 zherczeg 1002 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1789     if (PRIVATE_DATA(cc))
1790 zherczeg 986 {
1791     count = 2;
1792 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1793 zherczeg 1195 srcw[1] = srcw[0] + sizeof(sljit_sw);
1794 zherczeg 986 }
1795     cc += 1 + IMM2_SIZE;
1796     break;
1797    
1798     case OP_CLASS:
1799     case OP_NCLASS:
1800     #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1801     case OP_XCLASS:
1802 zherczeg 989 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1803 zherczeg 986 #else
1804 zherczeg 989 size = 1 + 32 / (int)sizeof(pcre_uchar);
1805 zherczeg 986 #endif
1806 zherczeg 1002 if (PRIVATE_DATA(cc))
1807 zherczeg 986 switch(get_class_iterator_size(cc + size))
1808     {
1809     case 1:
1810     count = 1;
1811 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1812 zherczeg 986 break;
1813    
1814     case 2:
1815     count = 2;
1816 zherczeg 1002 srcw[0] = PRIVATE_DATA(cc);
1817 zherczeg 1195 srcw[1] = srcw[0] + sizeof(sljit_sw);
1818 zherczeg 986 break;
1819    
1820     default:
1821     SLJIT_ASSERT_STOP();
1822     break;
1823     }
1824     cc += size;
1825     break;
1826    
1827 ph10 664 default:
1828     cc = next_opcode(common, cc);
1829     SLJIT_ASSERT(cc != NULL);
1830     break;
1831     }
1832     break;
1833    
1834     case end:
1835     SLJIT_ASSERT_STOP();
1836     break;
1837     }
1838    
1839     while (count > 0)
1840     {
1841     count--;
1842     if (save)
1843     {
1844     if (tmp1next)
1845     {
1846     if (!tmp1empty)
1847     {
1848     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1849 zherczeg 1195 stackptr += sizeof(sljit_sw);
1850 ph10 664 }
1851     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1852     tmp1empty = FALSE;
1853     tmp1next = FALSE;
1854     }
1855     else
1856     {
1857     if (!tmp2empty)
1858     {
1859     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1860 zherczeg 1195 stackptr += sizeof(sljit_sw);
1861 ph10 664 }
1862     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1863     tmp2empty = FALSE;
1864     tmp1next = TRUE;
1865     }
1866     }
1867     else
1868     {
1869     if (tmp1next)
1870     {
1871     SLJIT_ASSERT(!tmp1empty);
1872     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1873     tmp1empty = stackptr >= stacktop;
1874     if (!tmp1empty)
1875     {
1876     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1877 zherczeg 1195 stackptr += sizeof(sljit_sw);
1878 ph10 664 }
1879     tmp1next = FALSE;
1880     }
1881     else
1882     {
1883     SLJIT_ASSERT(!tmp2empty);
1884     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1885     tmp2empty = stackptr >= stacktop;
1886     if (!tmp2empty)
1887     {
1888     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1889 zherczeg 1195 stackptr += sizeof(sljit_sw);
1890 ph10 664 }
1891     tmp1next = TRUE;
1892     }
1893     }
1894     }
1895     }
1896 zherczeg 1275 while (status != end);
1897 ph10 664
1898     if (save)
1899     {
1900     if (tmp1next)
1901     {
1902     if (!tmp1empty)
1903     {
1904     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1905 zherczeg 1195 stackptr += sizeof(sljit_sw);
1906 ph10 664 }
1907     if (!tmp2empty)
1908     {
1909     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1910 zherczeg 1195 stackptr += sizeof(sljit_sw);
1911 ph10 664 }
1912     }
1913     else
1914     {
1915     if (!tmp2empty)
1916     {
1917     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1918 zherczeg 1195 stackptr += sizeof(sljit_sw);
1919 ph10 664 }
1920     if (!tmp1empty)
1921     {
1922     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1923 zherczeg 1195 stackptr += sizeof(sljit_sw);
1924 ph10 664 }
1925     }
1926     }
1927     SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1928     }
1929    
1930 zherczeg 1279 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1931     {
1932     pcre_uchar *end = bracketend(cc);
1933     BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1934    
1935     /* Assert captures then. */
1936     if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1937     current_offset = NULL;
1938     /* Conditional block does not. */
1939     if (*cc == OP_COND || *cc == OP_SCOND)
1940     has_alternatives = FALSE;
1941    
1942     cc = next_opcode(common, cc);
1943     if (has_alternatives)
1944     current_offset = common->then_offsets + (cc - common->start);
1945    
1946     while (cc < end)
1947     {
1948     if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1949     cc = set_then_offsets(common, cc, current_offset);
1950     else
1951     {
1952     if (*cc == OP_ALT && has_alternatives)
1953     current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1954     if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1955     *current_offset = 1;
1956     cc = next_opcode(common, cc);
1957     }
1958     }
1959    
1960     return end;
1961     }
1962    
1963 zherczeg 1002 #undef CASE_ITERATOR_PRIVATE_DATA_1
1964     #undef CASE_ITERATOR_PRIVATE_DATA_2A
1965     #undef CASE_ITERATOR_PRIVATE_DATA_2B
1966     #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1967     #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1968     #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1969 zherczeg 986
1970 zherczeg 1052 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1971 ph10 664 {
1972     return (value & (value - 1)) == 0;
1973     }
1974    
1975     static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1976     {
1977     while (list)
1978     {
1979     /* sljit_set_label is clever enough to do nothing
1980 zherczeg 996 if either the jump or the label is NULL. */
1981 zherczeg 1246 SET_LABEL(list->jump, label);
1982 ph10 664 list = list->next;
1983     }
1984     }
1985    
1986     static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1987     {
1988     jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1989     if (list_item)
1990     {
1991     list_item->next = *list;
1992     list_item->jump = jump;
1993     *list = list_item;
1994     }
1995     }
1996    
1997 zherczeg 1249 static void add_stub(compiler_common *common, struct sljit_jump *start)
1998 ph10 664 {
1999     DEFINE_COMPILER;
2000     stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2001    
2002     if (list_item)
2003     {
2004     list_item->start = start;
2005 zherczeg 991 list_item->quit = LABEL();
2006 ph10 664 list_item->next = common->stubs;
2007     common->stubs = list_item;
2008     }
2009     }
2010    
2011     static void flush_stubs(compiler_common *common)
2012     {
2013     DEFINE_COMPILER;
2014     stub_list* list_item = common->stubs;
2015    
2016     while (list_item)
2017     {
2018     JUMPHERE(list_item->start);
2019 zherczeg 1249 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2020 zherczeg 991 JUMPTO(SLJIT_JUMP, list_item->quit);
2021 ph10 664 list_item = list_item->next;
2022     }
2023     common->stubs = NULL;
2024     }
2025    
2026 zherczeg 1316 static SLJIT_INLINE void count_match(compiler_common *common)
2027 ph10 677 {
2028     DEFINE_COMPILER;
2029    
2030 zherczeg 1316 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2031 ph10 677 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2032     }
2033    
2034 ph10 664 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2035     {
2036     /* May destroy all locals and registers except TMP2. */
2037     DEFINE_COMPILER;
2038    
2039 zherczeg 1195 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2040 ph10 664 #ifdef DESTROY_REGISTERS
2041     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2042     OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2043     OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2044     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2045     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2046     #endif
2047 zherczeg 1249 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2048 ph10 664 }
2049    
2050     static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2051     {
2052     DEFINE_COMPILER;
2053 zherczeg 1195 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2054 ph10 664 }
2055    
2056     static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2057     {
2058     DEFINE_COMPILER;
2059     struct sljit_label *loop;
2060     int i;
2061 zherczeg 1272
2062 ph10 664 /* At this point we can freely use all temporary registers. */
2063 zherczeg 1272 SLJIT_ASSERT(length > 1);
2064 ph10 664 /* TMP1 returns with begin - 1. */
2065 zherczeg 1215 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2066 ph10 664 if (length < 8)
2067     {
2068 zherczeg 1272 for (i = 1; i < length; i++)
2069 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2070 ph10 664 }
2071     else
2072     {
2073 zherczeg 1272 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2074     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2075 ph10 664 loop = LABEL();
2076 zherczeg 1215 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2077     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2078 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2079     }
2080     }
2081    
2082 zherczeg 1275 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2083 zherczeg 1272 {
2084     DEFINE_COMPILER;
2085     struct sljit_label *loop;
2086     int i;
2087    
2088     SLJIT_ASSERT(length > 1);
2089     /* OVECTOR(1) contains the "string begin - 1" constant. */
2090     if (length > 2)
2091     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2092     if (length < 8)
2093     {
2094     for (i = 2; i < length; i++)
2095     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2096     }
2097     else
2098     {
2099     GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2100 zherczeg 1275 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2101 zherczeg 1272 loop = LABEL();
2102     OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2103 zherczeg 1275 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2104 zherczeg 1272 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2105     }
2106 zherczeg 1275
2107     OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2108     if (common->mark_ptr != 0)
2109     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2110 zherczeg 1290 if (common->control_head_ptr != 0)
2111     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2112 zherczeg 1275 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2113     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2114 zherczeg 1272 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2115     }
2116    
2117 zherczeg 1290 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2118 zherczeg 1275 {
2119 zherczeg 1290 while (current != NULL)
2120 zherczeg 1275 {
2121     switch (current[-2])
2122     {
2123 zherczeg 1279 case type_then_trap:
2124 zherczeg 1275 break;
2125    
2126 zherczeg 1278 case type_mark:
2127 zherczeg 1290 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2128     return current[-4];
2129 zherczeg 1278 break;
2130    
2131 zherczeg 1275 default:
2132     SLJIT_ASSERT_STOP();
2133     break;
2134     }
2135     current = (sljit_sw*)current[-1];
2136     }
2137 zherczeg 1290 return -1;
2138 zherczeg 1275 }
2139    
2140 zherczeg 696 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2141 ph10 664 {
2142     DEFINE_COMPILER;
2143     struct sljit_label *loop;
2144 zherczeg 1245 struct sljit_jump *early_quit;
2145 ph10 664
2146     /* At this point we can freely use all registers. */
2147 zherczeg 880 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2148 zherczeg 696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2149    
2150 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2151 zherczeg 929 if (common->mark_ptr != 0)
2152 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2153 zherczeg 1245 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2154 zherczeg 929 if (common->mark_ptr != 0)
2155 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2156     OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2157     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2158 zherczeg 955 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2159 ph10 664 /* Unlikely, but possible */
2160 zherczeg 1245 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2161 ph10 664 loop = LABEL();
2162 zherczeg 1215 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2163 zherczeg 1195 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2164 ph10 664 /* Copy the integer value to the output buffer */
2165 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2166     OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2167 ph10 836 #endif
2168 zherczeg 1215 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2169     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2170 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2171 zherczeg 1245 JUMPHERE(early_quit);
2172 zherczeg 696
2173     /* Calculate the return value, which is the maximum ovector value. */
2174     if (topbracket > 1)
2175     {
2176 zherczeg 1215 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2177     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2178 zherczeg 696
2179 zherczeg 880 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2180 zherczeg 696 loop = LABEL();
2181 zherczeg 1215 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2182     OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2183     CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2184     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2185 zherczeg 696 }
2186     else
2187     OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2188 ph10 664 }
2189    
2190 zherczeg 991 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2191 zherczeg 914 {
2192     DEFINE_COMPILER;
2193 zherczeg 1252 struct sljit_jump *jump;
2194 zherczeg 914
2195     SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2196 zherczeg 1272 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2197     && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2198 zherczeg 914
2199 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2200 zherczeg 914 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2201 zherczeg 1252 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2202     CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2203 zherczeg 914
2204     /* Store match begin and end. */
2205 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2206     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2207 zherczeg 1252
2208     jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2209 zherczeg 1276 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2210 zherczeg 1252 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2211     OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2212     #endif
2213     OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2214     JUMPHERE(jump);
2215    
2216 zherczeg 1215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2217 zherczeg 914 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2218 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2219     OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2220 zherczeg 914 #endif
2221 zherczeg 1215 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2222 zherczeg 914
2223 zherczeg 1215 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2224 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2225 zherczeg 1215 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2226 zherczeg 914 #endif
2227 zherczeg 1215 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2228 zherczeg 914
2229 zherczeg 991 JUMPTO(SLJIT_JUMP, quit);
2230 zherczeg 914 }
2231    
2232     static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2233     {
2234     /* May destroy TMP1. */
2235     DEFINE_COMPILER;
2236     struct sljit_jump *jump;
2237    
2238     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2239     {
2240 zherczeg 920 /* The value of -1 must be kept for start_used_ptr! */
2241     OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2242     /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2243     is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2244 zherczeg 914 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2245 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2246 zherczeg 914 JUMPHERE(jump);
2247     }
2248     else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2249     {
2250 zherczeg 920 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252 zherczeg 914 JUMPHERE(jump);
2253     }
2254     }
2255    
2256 ph10 836 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2257 ph10 664 {
2258     /* Detects if the character has an othercase. */
2259     unsigned int c;
2260    
2261 ph10 836 #ifdef SUPPORT_UTF
2262     if (common->utf)
2263 ph10 664 {
2264     GETCHAR(c, cc);
2265     if (c > 127)
2266     {
2267     #ifdef SUPPORT_UCP
2268     return c != UCD_OTHERCASE(c);
2269     #else
2270     return FALSE;
2271     #endif
2272     }
2273 ph10 836 #ifndef COMPILE_PCRE8
2274     return common->fcc[c] != c;
2275     #endif
2276 ph10 664 }
2277     else
2278     #endif
2279     c = *cc;
2280 ph10 836 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2281 ph10 664 }
2282    
2283     static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2284     {
2285     /* Returns with the othercase. */
2286 ph10 836 #ifdef SUPPORT_UTF
2287     if (common->utf && c > 127)
2288 ph10 664 {
2289     #ifdef SUPPORT_UCP
2290     return UCD_OTHERCASE(c);
2291     #else
2292     return c;
2293     #endif
2294     }
2295     #endif
2296 ph10 836 return TABLE_GET(c, common->fcc, c);
2297 ph10 664 }
2298    
2299 ph10 836 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2300 ph10 664 {
2301     /* Detects if the character and its othercase has only 1 bit difference. */
2302     unsigned int c, oc, bit;
2303 ph10 836 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2304 ph10 664 int n;
2305     #endif
2306    
2307 ph10 836 #ifdef SUPPORT_UTF
2308     if (common->utf)
2309 ph10 664 {
2310     GETCHAR(c, cc);
2311     if (c <= 127)
2312     oc = common->fcc[c];
2313     else
2314     {
2315     #ifdef SUPPORT_UCP
2316     oc = UCD_OTHERCASE(c);
2317     #else
2318     oc = c;
2319     #endif
2320     }
2321     }
2322     else
2323     {
2324     c = *cc;
2325 ph10 836 oc = TABLE_GET(c, common->fcc, c);
2326 ph10 664 }
2327     #else
2328     c = *cc;
2329 ph10 836 oc = TABLE_GET(c, common->fcc, c);
2330 ph10 664 #endif
2331    
2332     SLJIT_ASSERT(c != oc);
2333    
2334     bit = c ^ oc;
2335     /* Optimized for English alphabet. */
2336     if (c <= 127 && bit == 0x20)
2337     return (0 << 8) | 0x20;
2338    
2339     /* Since c != oc, they must have at least 1 bit difference. */
2340 zherczeg 1052 if (!is_powerof2(bit))
2341 ph10 664 return 0;
2342    
2343 chpe 1055 #if defined COMPILE_PCRE8
2344 ph10 836
2345     #ifdef SUPPORT_UTF
2346     if (common->utf && c > 127)
2347 ph10 664 {
2348 ph10 836 n = GET_EXTRALEN(*cc);
2349 ph10 664 while ((bit & 0x3f) == 0)
2350     {
2351     n--;
2352     bit >>= 6;
2353     }
2354     return (n << 8) | bit;
2355     }
2356 ph10 836 #endif /* SUPPORT_UTF */
2357 ph10 664 return (0 << 8) | bit;
2358 ph10 836
2359 chpe 1055 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2360 ph10 836
2361     #ifdef SUPPORT_UTF
2362     if (common->utf && c > 65535)
2363     {
2364     if (bit >= (1 << 10))
2365     bit >>= 10;
2366     else
2367     return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2368     }
2369     #endif /* SUPPORT_UTF */
2370     return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2371    
2372 chpe 1055 #endif /* COMPILE_PCRE[8|16|32] */
2373 ph10 664 }
2374    
2375 zherczeg 918 static void check_partial(compiler_common *common, BOOL force)
2376 ph10 664 {
2377 ph10 1325 /* Checks whether a partial matching is occurred. Does not modify registers. */
2378 ph10 664 DEFINE_COMPILER;
2379 zherczeg 918 struct sljit_jump *jump = NULL;
2380 zherczeg 914
2381 zherczeg 918 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2382    
2383 zherczeg 914 if (common->mode == JIT_COMPILE)
2384     return;
2385    
2386 zherczeg 920 if (!force)
2387     jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2388     else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2389     jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2390 zherczeg 918
2391 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2392 zherczeg 1269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2393 zherczeg 914 else
2394     {
2395     if (common->partialmatchlabel != NULL)
2396     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2397     else
2398     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2399     }
2400 zherczeg 918
2401     if (jump != NULL)
2402     JUMPHERE(jump);
2403 ph10 664 }
2404    
2405 zherczeg 1269 static void check_str_end(compiler_common *common, jump_list **end_reached)
2406 zherczeg 914 {
2407     /* Does not affect registers. Usually used in a tight spot. */
2408     DEFINE_COMPILER;
2409     struct sljit_jump *jump;
2410    
2411     if (common->mode == JIT_COMPILE)
2412 zherczeg 1269 {
2413     add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2414     return;
2415     }
2416 zherczeg 914
2417     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2418     if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2419     {
2420 zherczeg 1269 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2421     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2422     add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2423 zherczeg 914 }
2424     else
2425     {
2426 zherczeg 1269 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2427 zherczeg 914 if (common->partialmatchlabel != NULL)
2428     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2429     else
2430     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2431     }
2432     JUMPHERE(jump);
2433     }
2434    
2435 zherczeg 970 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2436 zherczeg 914 {
2437     DEFINE_COMPILER;
2438     struct sljit_jump *jump;
2439    
2440     if (common->mode == JIT_COMPILE)
2441     {
2442 zherczeg 970 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2443 zherczeg 914 return;
2444     }
2445    
2446     /* Partial matching mode. */
2447     jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2448 zherczeg 970 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2449 zherczeg 914 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450     {
2451 zherczeg 1269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2452 zherczeg 970 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2453 zherczeg 914 }
2454     else
2455     {
2456     if (common->partialmatchlabel != NULL)
2457     JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458     else
2459     add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460     }
2461     JUMPHERE(jump);
2462     }
2463    
2464 ph10 664 static void read_char(compiler_common *common)
2465     {
2466     /* Reads the character into TMP1, updates STR_PTR.
2467     Does not check STR_END. TMP2 Destroyed. */
2468     DEFINE_COMPILER;
2469 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2470 ph10 664 struct sljit_jump *jump;
2471     #endif
2472    
2473 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2474 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475 ph10 836 if (common->utf)
2476 ph10 664 {
2477 chpe 1055 #if defined COMPILE_PCRE8
2478 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2479 chpe 1055 #elif defined COMPILE_PCRE16
2480 ph10 836 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2481 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
2482 ph10 836 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483 ph10 664 JUMPHERE(jump);
2484     }
2485 chpe 1055 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2486 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2487 ph10 664 }
2488    
2489     static void peek_char(compiler_common *common)
2490     {
2491     /* Reads the character into TMP1, keeps STR_PTR.
2492     Does not check STR_END. TMP2 Destroyed. */
2493     DEFINE_COMPILER;
2494 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2495 ph10 664 struct sljit_jump *jump;
2496     #endif
2497    
2498 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2499 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2500 ph10 836 if (common->utf)
2501 ph10 664 {
2502 chpe 1055 #if defined COMPILE_PCRE8
2503 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2504 chpe 1055 #elif defined COMPILE_PCRE16
2505 ph10 836 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2506 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
2507 ph10 836 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2508 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2509     JUMPHERE(jump);
2510     }
2511 chpe 1055 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2512 ph10 664 }
2513    
2514     static void read_char8_type(compiler_common *common)
2515     {
2516     /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2517     DEFINE_COMPILER;
2518 chpe 1055 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2519 ph10 664 struct sljit_jump *jump;
2520     #endif
2521    
2522 ph10 836 #ifdef SUPPORT_UTF
2523     if (common->utf)
2524 ph10 664 {
2525 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2526     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527 chpe 1055 #if defined COMPILE_PCRE8
2528 ph10 664 /* This can be an extra read in some situations, but hopefully
2529 ph10 836 it is needed in most cases. */
2530 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2531 zherczeg 736 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2532 ph10 836 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2533 ph10 664 JUMPHERE(jump);
2534 chpe 1055 #elif defined COMPILE_PCRE16
2535 ph10 836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2536     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2537     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2538     JUMPHERE(jump);
2539     /* Skip low surrogate if necessary. */
2540     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2541     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2542 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2543 ph10 836 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2544     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2545 chpe 1055 #elif defined COMPILE_PCRE32
2546     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2547     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2548     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2549     JUMPHERE(jump);
2550     #endif /* COMPILE_PCRE[8|16|32] */
2551 ph10 664 return;
2552     }
2553 chpe 1055 #endif /* SUPPORT_UTF */
2554 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2555     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2556 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2557 ph10 836 /* The ctypes array contains only 256 values. */
2558     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2559     jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2560     #endif
2561     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2562 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2563 ph10 836 JUMPHERE(jump);
2564     #endif
2565 ph10 664 }
2566    
2567     static void skip_char_back(compiler_common *common)
2568     {
2569 ph10 836 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2570 ph10 664 DEFINE_COMPILER;
2571 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2572     #if defined COMPILE_PCRE8
2573 ph10 664 struct sljit_label *label;
2574    
2575 ph10 836 if (common->utf)
2576 ph10 664 {
2577     label = LABEL();
2578 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2579     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2580 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2581     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2582     return;
2583     }
2584 chpe 1055 #elif defined COMPILE_PCRE16
2585 ph10 836 if (common->utf)
2586     {
2587     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2588     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2589     /* Skip low surrogate if necessary. */
2590     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2591     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2592 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2593 ph10 836 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2594     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2595     return;
2596     }
2597 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
2598     #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599 ph10 836 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600 ph10 664 }
2601    
2602 zherczeg 970 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2603 ph10 664 {
2604     /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2605     DEFINE_COMPILER;
2606    
2607     if (nltype == NLTYPE_ANY)
2608     {
2609     add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2610 zherczeg 970 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2611 ph10 664 }
2612     else if (nltype == NLTYPE_ANYCRLF)
2613     {
2614     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2615 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2616 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2617 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2618 zherczeg 970 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2619 ph10 664 }
2620     else
2621     {
2622 ph10 836 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2623 zherczeg 970 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2624 ph10 664 }
2625     }
2626    
2627 ph10 836 #ifdef SUPPORT_UTF
2628    
2629 chpe 1055 #if defined COMPILE_PCRE8
2630 ph10 836 static void do_utfreadchar(compiler_common *common)
2631 ph10 664 {
2632 ph10 836 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2633 zherczeg 736 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2634 ph10 664 DEFINE_COMPILER;
2635     struct sljit_jump *jump;
2636    
2637 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2638 ph10 664 /* Searching for the first zero. */
2639     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2640     jump = JUMP(SLJIT_C_NOT_ZERO);
2641 ph10 836 /* Two byte sequence. */
2642     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2643     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2644 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2645     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2646     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2647     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2648 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2649 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2650     JUMPHERE(jump);
2651    
2652     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2653     jump = JUMP(SLJIT_C_NOT_ZERO);
2654 ph10 836 /* Three byte sequence. */
2655     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2656 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2657     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2658     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2660     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2662     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2663 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2664     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2665 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2666 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2667     JUMPHERE(jump);
2668    
2669 ph10 836 /* Four byte sequence. */
2670     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2671 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2672     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2673     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2674     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2675     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2676 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2677 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2678     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2679     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2681     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2682 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2683     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2684 ph10 836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2685 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2686     }
2687    
2688 ph10 836 static void do_utfreadtype8(compiler_common *common)
2689 ph10 664 {
2690 ph10 836 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2691     of the character (>= 0xc0). Return value in TMP1. */
2692 ph10 664 DEFINE_COMPILER;
2693     struct sljit_jump *jump;
2694     struct sljit_jump *compare;
2695    
2696 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2697 ph10 664
2698     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2699     jump = JUMP(SLJIT_C_NOT_ZERO);
2700 ph10 836 /* Two byte sequence. */
2701     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2702     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703 ph10 664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2704     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2705     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2706     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2707     compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2708     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2709     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710    
2711     JUMPHERE(compare);
2712     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2713     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2714     JUMPHERE(jump);
2715    
2716     /* We only have types for characters less than 256. */
2717 zherczeg 1195 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2718 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2719     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2720     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2721     }
2722    
2723 chpe 1055 #elif defined COMPILE_PCRE16
2724 ph10 664
2725 ph10 836 static void do_utfreadchar(compiler_common *common)
2726     {
2727     /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2728     of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2729     DEFINE_COMPILER;
2730     struct sljit_jump *jump;
2731    
2732 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2733 ph10 836 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2734     /* Do nothing, only return. */
2735     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2736    
2737     JUMPHERE(jump);
2738     /* Combine two 16 bit characters. */
2739     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2740     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2742     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2743     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2744     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2745     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2746     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2747     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2748     }
2749    
2750 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
2751 ph10 836
2752     #endif /* SUPPORT_UTF */
2753    
2754 ph10 664 #ifdef SUPPORT_UCP
2755    
2756     /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2757     #define UCD_BLOCK_MASK 127
2758     #define UCD_BLOCK_SHIFT 7
2759    
2760     static void do_getucd(compiler_common *common)
2761     {
2762     /* Search the UCD record for the character comes in TMP1.
2763     Returns chartype in TMP1 and UCD offset in TMP2. */
2764     DEFINE_COMPILER;
2765    
2766     SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2767    
2768 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2769 ph10 664 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2770 zherczeg 1195 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2771 ph10 664 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2772     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2773     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2774 zherczeg 1195 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2775 ph10 664 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2776 zherczeg 1195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2777 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2778     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779     }
2780     #endif
2781    
2782     static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2783     {
2784     DEFINE_COMPILER;
2785     struct sljit_label *mainloop;
2786     struct sljit_label *newlinelabel = NULL;
2787     struct sljit_jump *start;
2788     struct sljit_jump *end = NULL;
2789     struct sljit_jump *nl = NULL;
2790 chpe 1120 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2791 ph10 836 struct sljit_jump *singlechar;
2792 zherczeg 736 #endif
2793 ph10 664 jump_list *newline = NULL;
2794     BOOL newlinecheck = FALSE;
2795 ph10 836 BOOL readuchar = FALSE;
2796 ph10 664
2797     if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2798     common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2799     newlinecheck = TRUE;
2800    
2801     if (firstline)
2802     {
2803     /* Search for the end of the first line. */
2804 zherczeg 920 SLJIT_ASSERT(common->first_line_end != 0);
2805 zherczeg 993 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2806 ph10 664
2807     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2808     {
2809     mainloop = LABEL();
2810 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2812 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2813     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2814 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2815     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2816 zherczeg 995 JUMPHERE(end);
2817 zherczeg 920 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818 ph10 664 }
2819     else
2820     {
2821     end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2822     mainloop = LABEL();
2823     /* Continual stores does not cause data dependency. */
2824 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2825 ph10 664 read_char(common);
2826     check_newlinechar(common, common->nltype, &newline, TRUE);
2827     CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2828 zherczeg 995 JUMPHERE(end);
2829 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2830 ph10 664 set_jumps(newline, LABEL());
2831     }
2832    
2833 zherczeg 993 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2834 ph10 664 }
2835    
2836     start = JUMP(SLJIT_JUMP);
2837    
2838     if (newlinecheck)
2839     {
2840     newlinelabel = LABEL();
2841 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2842 ph10 664 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2843 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2844 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2845 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2846 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2847     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2848 ph10 836 #endif
2849 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2850     nl = JUMP(SLJIT_JUMP);
2851     }
2852    
2853     mainloop = LABEL();
2854    
2855     /* Increasing the STR_PTR here requires one less jump in the most common case. */
2856 ph10 836 #ifdef SUPPORT_UTF
2857     if (common->utf) readuchar = TRUE;
2858 ph10 664 #endif
2859 ph10 836 if (newlinecheck) readuchar = TRUE;
2860 ph10 664
2861 ph10 836 if (readuchar)
2862     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2863 ph10 664
2864     if (newlinecheck)
2865     CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2866    
2867 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2868 chpe 1055 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2869     #if defined COMPILE_PCRE8
2870 ph10 836 if (common->utf)
2871 ph10 664 {
2872 ph10 836 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2873 zherczeg 1195 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2874 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2875 ph10 836 JUMPHERE(singlechar);
2876 ph10 664 }
2877 chpe 1055 #elif defined COMPILE_PCRE16
2878 ph10 836 if (common->utf)
2879     {
2880     singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2881     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2882     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2883 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2884 ph10 836 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2885     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2886     JUMPHERE(singlechar);
2887     }
2888 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
2889     #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2890 ph10 664 JUMPHERE(start);
2891    
2892     if (newlinecheck)
2893     {
2894     JUMPHERE(end);
2895     JUMPHERE(nl);
2896     }
2897    
2898     return mainloop;
2899     }
2900    
2901 zherczeg 1040 #define MAX_N_CHARS 3
2902    
2903     static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2904 zherczeg 986 {
2905     DEFINE_COMPILER;
2906     struct sljit_label *start;
2907 zherczeg 991 struct sljit_jump *quit;
2908 chpe 1055 pcre_uint32 chars[MAX_N_CHARS * 2];
2909 zherczeg 1239 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2910 zherczeg 989 int location = 0;
2911     pcre_int32 len, c, bit, caseless;
2912 zherczeg 1040 int must_stop;
2913 zherczeg 986
2914 zherczeg 1040 /* We do not support alternatives now. */
2915 zherczeg 986 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2916     return FALSE;
2917    
2918     while (TRUE)
2919     {
2920     caseless = 0;
2921 zherczeg 1040 must_stop = 1;
2922 zherczeg 986 switch(*cc)
2923     {
2924     case OP_CHAR:
2925 zherczeg 1040 must_stop = 0;
2926 zherczeg 986 cc++;
2927     break;
2928    
2929     case OP_CHARI:
2930     caseless = 1;
2931 zherczeg 1040 must_stop = 0;
2932 zherczeg 986 cc++;
2933     break;
2934    
2935     case OP_SOD:
2936     case OP_SOM:
2937     case OP_SET_SOM:
2938     case OP_NOT_WORD_BOUNDARY:
2939     case OP_WORD_BOUNDARY:
2940     case OP_EODN:
2941     case OP_EOD:
2942     case OP_CIRC:
2943     case OP_CIRCM:
2944     case OP_DOLL:
2945     case OP_DOLLM:
2946     /* Zero width assertions. */
2947     cc++;
2948     continue;
2949    
2950     case OP_PLUS:
2951     case OP_MINPLUS:
2952     case OP_POSPLUS:
2953     cc++;
2954     break;
2955    
2956     case OP_EXACT:
2957     cc += 1 + IMM2_SIZE;
2958     break;
2959    
2960     case OP_PLUSI:
2961     case OP_MINPLUSI:
2962     case OP_POSPLUSI:
2963     caseless = 1;
2964     cc++;
2965     break;
2966    
2967     case OP_EXACTI:
2968     caseless = 1;
2969     cc += 1 + IMM2_SIZE;
2970     break;
2971    
2972     default:
2973 zherczeg 1040 must_stop = 2;
2974     break;
2975 zherczeg 986 }
2976    
2977 zherczeg 1040 if (must_stop == 2)
2978     break;
2979    
2980 zherczeg 986 len = 1;
2981 chpe 1114 #ifdef SUPPORT_UTF
2982 zherczeg 986 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2983     #endif
2984    
2985     if (caseless && char_has_othercase(common, cc))
2986     {
2987     caseless = char_get_othercase_bit(common, cc);
2988     if (caseless == 0)
2989     return FALSE;
2990     #ifdef COMPILE_PCRE8
2991     caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2992     #else
2993     if ((caseless & 0x100) != 0)
2994     caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2995     else
2996     caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2997     #endif
2998     }
2999     else
3000     caseless = 0;
3001    
3002 zherczeg 1040 while (len > 0 && location < MAX_N_CHARS * 2)
3003 zherczeg 986 {
3004     c = *cc;
3005     bit = 0;
3006     if (len == (caseless & 0xff))
3007     {
3008     bit = caseless >> 8;
3009     c |= bit;
3010     }
3011    
3012 zherczeg 989 chars[location] = c;
3013     chars[location + 1] = bit;
3014 zherczeg 986
3015     len--;
3016 zherczeg 989 location += 2;
3017 zherczeg 986 cc++;
3018     }
3019    
3020 zherczeg 1040 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3021 zherczeg 986 break;
3022     }
3023    
3024 zherczeg 1040 /* At least two characters are required. */
3025     if (location < 2 * 2)
3026     return FALSE;
3027    
3028 zherczeg 986 if (firstline)
3029     {
3030 zherczeg 993 SLJIT_ASSERT(common->first_line_end != 0);
3031     OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3032 zherczeg 1242 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3033 zherczeg 986 }
3034     else
3035 zherczeg 1242 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3036 zherczeg 986
3037     start = LABEL();
3038 zherczeg 991 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3039 zherczeg 986
3040 zherczeg 1040 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3041 zherczeg 986 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3042 zherczeg 1040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3043     if (chars[1] != 0)
3044     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3045     CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3046     if (location > 2 * 2)
3047     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3048     if (chars[3] != 0)
3049     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3050     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3051     if (location > 2 * 2)
3052 zherczeg 986 {
3053 zherczeg 1040 if (chars[5] != 0)
3054     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3055     CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3056 zherczeg 986 }
3057 zherczeg 1040 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3058 zherczeg 986
3059 zherczeg 991 JUMPHERE(quit);
3060 zherczeg 986
3061     if (firstline)
3062 zherczeg 993 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3063     else
3064 zherczeg 1242 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3065 zherczeg 986 return TRUE;
3066     }
3067    
3068 zherczeg 1040 #undef MAX_N_CHARS
3069    
3070 ph10 836 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3071 ph10 664 {
3072     DEFINE_COMPILER;
3073     struct sljit_label *start;
3074 zherczeg 991 struct sljit_jump *quit;
3075 ph10 664 struct sljit_jump *found;
3076 ph10 836 pcre_uchar oc, bit;
3077 ph10 664
3078     if (firstline)
3079     {
3080 zherczeg 993 SLJIT_ASSERT(common->first_line_end != 0);
3081     OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3082 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3083 ph10 664 }
3084    
3085     start = LABEL();
3086 zherczeg 991 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3087 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3088 ph10 664
3089 ph10 836 oc = first_char;
3090     if (caseless)
3091     {
3092     oc = TABLE_GET(first_char, common->fcc, first_char);
3093     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3094     if (first_char > 127 && common->utf)
3095     oc = UCD_OTHERCASE(first_char);
3096     #endif
3097     }
3098     if (first_char == oc)
3099     found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3100 ph10 664 else
3101     {
3102 ph10 836 bit = first_char ^ oc;
3103 zherczeg 1052 if (is_powerof2(bit))
3104 ph10 664 {
3105     OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3106 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3107 ph10 664 }
3108     else
3109     {
3110 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3111 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3112 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3113 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3114 ph10 664 found = JUMP(SLJIT_C_NOT_ZERO);
3115     }
3116     }
3117    
3118 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 ph10 664 JUMPTO(SLJIT_JUMP, start);
3120     JUMPHERE(found);
3121 zherczeg 991 JUMPHERE(quit);
3122 ph10 664
3123     if (firstline)
3124 zherczeg 993 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3125 ph10 664 }
3126    
3127     static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3128     {
3129     DEFINE_COMPILER;
3130     struct sljit_label *loop;
3131     struct sljit_jump *lastchar;
3132     struct sljit_jump *firstchar;
3133 zherczeg 991 struct sljit_jump *quit;
3134 ph10 664 struct sljit_jump *foundcr = NULL;
3135     struct sljit_jump *notfoundnl;
3136     jump_list *newline = NULL;
3137    
3138     if (firstline)
3139     {
3140 zherczeg 993 SLJIT_ASSERT(common->first_line_end != 0);
3141     OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3142 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3143 ph10 664 }
3144    
3145     if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3146     {
3147     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3148     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3149     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3150     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3151     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3152    
3153 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3154 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3155 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3156 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3157     OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3158 ph10 836 #endif
3159 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3160    
3161     loop = LABEL();
3162 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3163 zherczeg 991 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3164 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3165     OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3166 ph10 664 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3167     CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3168    
3169 zherczeg 991 JUMPHERE(quit);
3170 ph10 664 JUMPHERE(firstchar);
3171     JUMPHERE(lastchar);
3172    
3173     if (firstline)
3174     OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3175     return;
3176     }
3177    
3178     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3179     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3180     firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3181     skip_char_back(common);
3182    
3183     loop = LABEL();
3184     read_char(common);
3185     lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3186     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3187     foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3188     check_newlinechar(common, common->nltype, &newline, FALSE);
3189     set_jumps(newline, loop);
3190    
3191     if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3192     {
3193 zherczeg 991 quit = JUMP(SLJIT_JUMP);
3194 ph10 664 JUMPHERE(foundcr);
3195     notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3196 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3197 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3198 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3199 chpe 1055 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3200     OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3201 ph10 836 #endif
3202 ph10 664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3203     JUMPHERE(notfoundnl);
3204 zherczeg 991 JUMPHERE(quit);
3205 ph10 664 }
3206     JUMPHERE(lastchar);
3207     JUMPHERE(firstchar);
3208    
3209     if (firstline)
3210 zherczeg 993 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3211 ph10 664 }
3212    
3213 zherczeg 1244 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3214    
3215 ph10 664 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3216     {
3217     DEFINE_COMPILER;
3218     struct sljit_label *start;
3219 zherczeg 991 struct sljit_jump *quit;
3220 zherczeg 1244 struct sljit_jump *found = NULL;
3221     jump_list *matches = NULL;
3222     pcre_uint8 inverted_start_bits[32];
3223     int i;
3224 ph10 836 #ifndef COMPILE_PCRE8
3225     struct sljit_jump *jump;
3226     #endif
3227 ph10 664
3228 zherczeg 1244 for (i = 0; i < 32; ++i)
3229     inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3230    
3231 ph10 664 if (firstline)
3232     {
3233 zherczeg 993 SLJIT_ASSERT(common->first_line_end != 0);
3234 zherczeg 995 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3235 zherczeg 920 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3236 ph10 664 }
3237    
3238     start = LABEL();
3239 zherczeg 991 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3240 ph10 836 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3241     #ifdef SUPPORT_UTF
3242     if (common->utf)
3243 zherczeg 736 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3244 ph10 664 #endif
3245 zherczeg 1244
3246     if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3247     {
3248 ph10 836 #ifndef COMPILE_PCRE8
3249 zherczeg 1244 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3250     OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3251     JUMPHERE(jump);
3252 ph10 836 #endif
3253 zherczeg 1244 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3254     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3255     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3256     OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3257     OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3258     found = JUMP(SLJIT_C_NOT_ZERO);
3259     }
3260 ph10 664
3261 ph10 836 #ifdef SUPPORT_UTF
3262     if (common->utf)
3263 zherczeg 736 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3264     #endif
3265 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3266 chpe 1055 #ifdef SUPPORT_UTF
3267     #if defined COMPILE_PCRE8
3268 ph10 836 if (common->utf)
3269 zherczeg 736 {
3270     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3271 zherczeg 1195 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3272 zherczeg 736 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3273     }
3274 chpe 1055 #elif defined COMPILE_PCRE16
3275 ph10 836 if (common->utf)
3276     {
3277     CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3278     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3279     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3280 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3281 ph10 836 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3282     OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3283     }
3284 chpe 1055 #endif /* COMPILE_PCRE[8|16] */
3285     #endif /* SUPPORT_UTF */
3286 ph10 664 JUMPTO(SLJIT_JUMP, start);
3287 zherczeg 1244 if (found != NULL)
3288     JUMPHERE(found);
3289     if (matches != NULL)
3290     set_jumps(matches, LABEL());
3291 zherczeg 991 JUMPHERE(quit);
3292 ph10 664
3293     if (firstline)
3294 zherczeg 995 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3295 ph10 664 }
3296    
3297 ph10 836 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3298 ph10 664 {
3299     DEFINE_COMPILER;
3300     struct sljit_label *loop;
3301     struct sljit_jump *toolong;
3302     struct sljit_jump *alreadyfound;
3303     struct sljit_jump *found;
3304     struct sljit_jump *foundoc = NULL;
3305     struct sljit_jump *notfound;
3306 chpe 1084 pcre_uint32 oc, bit;
3307 ph10 664
3308 zherczeg 920 SLJIT_ASSERT(common->req_char_ptr != 0);
3309     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3310 ph10 664 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3311     toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3312     alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3313    
3314 ph10 836 if (has_firstchar)
3315     OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3316 ph10 664 else
3317     OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3318    
3319     loop = LABEL();
3320     notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3321    
3322 ph10 836 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3323     oc = req_char;
3324     if (caseless)
3325     {
3326     oc = TABLE_GET(req_char, common->fcc, req_char);
3327     #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3328     if (req_char > 127 && common->utf)
3329     oc = UCD_OTHERCASE(req_char);
3330     #endif
3331     }
3332     if (req_char == oc)
3333     found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3334 ph10 664 else
3335     {
3336 ph10 836 bit = req_char ^ oc;
3337 zherczeg 1052 if (is_powerof2(bit))
3338 ph10 664 {
3339     OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3340 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3341 ph10 664 }
3342     else
3343     {
3344 ph10 836 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3345 ph10 664 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3346     }
3347     }
3348 ph10 836 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3349 ph10 664 JUMPTO(SLJIT_JUMP, loop);
3350    
3351     JUMPHERE(found);
3352     if (foundoc)
3353     JUMPHERE(foundoc);
3354 zherczeg 920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3355 ph10 664 JUMPHERE(alreadyfound);
3356     JUMPHERE(toolong);
3357     return notfound;
3358     }
3359    
3360     static void do_revertframes(compiler_common *common)
3361     {
3362     DEFINE_COMPILER;
3363     struct sljit_jump *jump;
3364     struct sljit_label *mainloop;
3365    
3366 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3367 zherczeg 726 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3368 zherczeg 955 GET_LOCAL_BASE(TMP3, 0, 0);
3369 ph10 664
3370     /* Drop frames until we reach STACK_TOP. */
3371     mainloop = LABEL();
3372     OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3373 zherczeg 1246 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3374     jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3375    
3376 zherczeg 955 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3377 zherczeg 1195 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3378     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3379     OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3380 ph10 664 JUMPTO(SLJIT_JUMP, mainloop);
3381    
3382     JUMPHERE(jump);
3383 zherczeg 1246 jump = JUMP(SLJIT_C_SIG_LESS);
3384 ph10 664 /* End of dropping frames. */
3385     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3386    
3387     JUMPHERE(jump);
3388 zherczeg 1246 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3389     OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3390     OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3391 zherczeg 1195 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3392 ph10 664 JUMPTO(SLJIT_JUMP, mainloop);
3393     }
3394    
3395     static void check_wordboundary(compiler_common *common)
3396     {
3397     DEFINE_COMPILER;
3398 zherczeg 914 struct sljit_jump *skipread;
3399 zherczeg 1269 jump_list *skipread_list = NULL;
3400 ph10 836 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3401 ph10 664 struct sljit_jump *jump;
3402 ph10 670 #endif
3403 ph10 664
3404 zherczeg 741 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3405 ph10 664
3406 zherczeg 955 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3407 ph10 664 /* Get type of the previous char, and put it to LOCALS1. */
3408     OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3409     OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3410     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3411 zherczeg 914 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3412 ph10 664 skip_char_back(common);
3413 zherczeg 914 check_start_used_ptr(common);
3414 ph10 664 read_char(common);
3415    
3416     /* Testing char type. */
3417     #ifdef SUPPORT_UCP
3418 ph10 836 if (common->use_ucp)
3419 ph10 664 {
3420     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3421     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3422     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3423     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3424     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3425 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3426 ph10 664 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3427     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3428 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3429 ph10 664 JUMPHERE(jump);
3430     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3431     }
3432     else
3433     #endif
3434     {
3435 ph10 836 #ifndef COMPILE_PCRE8
3436     jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3437     #elif defined SUPPORT_UTF
3438 ph10 664 /* Here LOCALS1 has already been zeroed. */
3439     jump = NULL;
3440 ph10 836 if (common->utf)
3441 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3442 ph10 836 #endif /* COMPILE_PCRE8 */
3443 ph10 664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3444     OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3445     OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3446     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3447 ph10 836 #ifndef COMPILE_PCRE8
3448     JUMPHERE(jump);
3449     #elif defined SUPPORT_UTF
3450 ph10 664 if (jump != NULL)
3451     JUMPHERE(jump);
3452 ph10 836 #endif /* COMPILE_PCRE8 */
3453 ph10 664 }
3454 zherczeg 914 JUMPHERE(skipread);
3455 ph10 664
3456     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3457 zherczeg 1269 check_str_end(common, &skipread_list);
3458 ph10 664 peek_char(common);
3459    
3460     /* Testing char type. This is a code duplication. */
3461     #ifdef SUPPORT_UCP
3462 ph10 836 if (common->use_ucp)
3463 ph10 664 {
3464     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3465     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3466     add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3467     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3468     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3469 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3470 ph10 664 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3471     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3472 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3473 ph10 664 JUMPHERE(jump);
3474     }
3475     else
3476     #endif
3477     {
3478 ph10 836 #ifndef COMPILE_PCRE8
3479     /* TMP2 may be destroyed by peek_char. */
3480 ph10 664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3481 ph10 836 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3482     #elif defined SUPPORT_UTF
3483     OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3484 ph10 664 jump = NULL;
3485 ph10 836 if (common->utf)
3486 ph10 664 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3487     #endif
3488     OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3489     OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3490     OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3491 ph10 836 #ifndef COMPILE_PCRE8
3492     JUMPHERE(jump);
3493     #elif defined SUPPORT_UTF
3494 ph10 664 if (jump != NULL)
3495     JUMPHERE(jump);
3496 ph10 836 #endif /* COMPILE_PCRE8 */
3497 ph10 664 }
3498 zherczeg 1269 set_jumps(skipread_list, LABEL());
3499 ph10 664
3500     OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3501     sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3502     }
3503    
3504 zherczeg 990 /*
3505     range format:
3506    
3507     ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3508     ranges[1] = first bit (0 or 1)
3509     ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3510     */
3511    
3512     static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3513     {
3514     DEFINE_COMPILER;
3515     struct sljit_jump *jump;
3516    
3517     if (ranges[0] < 0)
3518     return FALSE;
3519    
3520     switch(ranges[0])
3521     {
3522     case 1:
3523     if (readch)
3524     read_char(common);
3525     add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3526     return TRUE;
3527    
3528     case 2:
3529     if (readch)
3530     read_char(common);
3531     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3532     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3533     return TRUE;
3534    
3535     case 4:
3536     if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3537     {
3538     if (readch)
3539     read_char(common);
3540     if (ranges[1] != 0)
3541     {
3542     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3543     add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3544     }
3545     else
3546     {
3547     jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3548     add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3549     JUMPHERE(jump);
3550     }
3551     return TRUE;
3552     }
3553 zherczeg 1052 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3554 zherczeg 992 {
3555     if (readch)
3556     read_char(common);
3557     OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3558     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3559     add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3560     return TRUE;
3561     }
3562 zherczeg 990 return FALSE;
3563    
3564     default:
3565     return FALSE;
3566     }
3567     }
3568    
3569     static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3570     {
3571     int i, bit, length;
3572     const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3573    
3574     bit = ctypes[0] & flag;
3575 zherczeg 992 ranges[0] = -1;
3576 zherczeg 990 ranges[1] = bit != 0 ? 1 : 0;
3577     length = 0;
3578    
3579     for (i = 1; i < 256; i++)
3580     if ((ctypes[i] & flag) != bit)
3581     {
3582     if (length >= MAX_RANGE_SIZE)
3583     return;
3584     ranges[2 + length] = i;
3585     length++;
3586     bit ^= flag;
3587     }
3588    
3589     if (bit != 0)
3590     {
3591     if (length >= MAX_RANGE_SIZE)
3592     return;
3593     ranges[2 + length] = 256;
3594     length++;
3595     }
3596     ranges[0] = length;
3597     }
3598    
3599     static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3600     {
3601     int ranges[2 + MAX_RANGE_SIZE];
3602     pcre_uint8 bit, cbit, all;
3603     int i, byte, length = 0;
3604    
3605     bit = bits[0] & 0x1;
3606     ranges[1] = bit;
3607     /* Can be 0 or 255. */
3608     all = -bit;
3609    
3610     for (i = 0; i < 256; )
3611     {
3612     byte = i >> 3;
3613     if ((i & 0x7) == 0 && bits[byte] == all)
3614     i += 8;
3615     else
3616     {
3617     cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3618     if (cbit != bit)
3619     {
3620     if (length >= MAX_RANGE_SIZE)
3621     return FALSE;
3622     ranges[2 + length] = i;
3623     length++;
3624     bit = cbit;
3625     all = -cbit;
3626     }
3627     i++;
3628     }
3629     }
3630    
3631     if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3632     {
3633     if (length >= MAX_RANGE_SIZE)
3634     return FALSE;
3635     ranges[2 + length] = 256;
3636     length++;
3637     }
3638     ranges[0] = length;
3639    
3640     return check_ranges(common, ranges, backtracks, FALSE);
3641     }
3642    
3643 ph10 664 static void check_anynewline(compiler_common *common)
3644     {
3645     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3646     DEFINE_COMPILER;
3647    
3648 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3649 ph10 664
3650     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3651     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3652 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3653 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3654 chpe 1055 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3655 ph10 836 #ifdef COMPILE_PCRE8
3656     if (common->utf)
3657 ph10 664 {
3658 ph10 836 #endif
3659 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3660 ph10 664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3661     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3662 ph10 836 #ifdef COMPILE_PCRE8
3663 ph10 664 }
3664     #endif
3665 chpe 1055 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3666 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3667 ph10 664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3668     }
3669    
3670     static void check_hspace(compiler_common *common)
3671     {
3672     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3673     DEFINE_COMPILER;
3674    
3675 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3676 ph10 664
3677     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3678 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3679 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3680 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3681 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3682 chpe 1055 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3683 ph10 836 #ifdef COMPILE_PCRE8
3684     if (common->utf)
3685 ph10 664 {
3686 ph10 836 #endif
3687 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3689 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3690 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3691 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3692 ph10 664 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3693     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3694 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3695 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3696 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3698 zherczeg 1209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3699 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3700 ph10 836 #ifdef COMPILE_PCRE8
3701 ph10 664 }
3702     #endif
3703 chpe 1055 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3704 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3705 ph10 664
3706     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3707     }
3708    
3709     static void check_vspace(compiler_common *common)
3710     {
3711     /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3712     DEFINE_COMPILER;
3713    
3714 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3715 ph10 664
3716     OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3717     OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3718 zherczeg 1209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3719 ph10 664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3720 chpe 1055 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3721 ph10 836 #ifdef COMPILE_PCRE8
3722     if (common->utf)
3723 ph10 664 {
3724 ph10 836 #endif
3725 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3726 ph10 664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3727     OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3728 ph10 836 #ifdef COMPILE_PCRE8
3729 ph10 664 }
3730     #endif
3731 chpe 1055 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3732 zherczeg 1209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3733 ph10 664
3734     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3735     }
3736    
3737     #define CHAR1 STR_END
3738     #define CHAR2 STACK_TOP
3739    
3740     static void do_casefulcmp(compiler_common *common)
3741     {
3742     DEFINE_COMPILER;
3743     struct sljit_jump *jump;
3744     struct sljit_label *label;
3745    
3746 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3747 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3748     OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3749     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3750 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3751     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3752 ph10 664
3753     label = LABEL();
3754 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3755     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3756 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3757 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3758 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
3759    
3760     JUMPHERE(jump);
3761 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762 ph10 664 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3763     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3764     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3765     }
3766    
3767     #define LCC_TABLE STACK_LIMIT
3768    
3769     static void do_caselesscmp(compiler_common *common)
3770     {
3771     DEFINE_COMPILER;
3772     struct sljit_jump *jump;
3773     struct sljit_label *label;
3774    
3775 zherczeg 955 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3776 ph10 664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3777    
3778     OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3779     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3780     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3781     OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3782 ph10 836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3783     OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3784 ph10 664
3785     label = LABEL();
3786 ph10 836 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3787     OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3788     #ifndef COMPILE_PCRE8
3789     jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3790     #endif
3791 ph10 664 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3792 ph10 836 #ifndef COMPILE_PCRE8
3793     JUMPHERE(jump);
3794     jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3795     #endif
3796 ph10 664 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3797 ph10 836 #ifndef COMPILE_PCRE8
3798     JUMPHERE(jump);
3799     #endif
3800 ph10 664 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3801 ph10 836 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3802 ph10 664 JUMPTO(SLJIT_C_NOT_ZERO, label);
3803    
3804     JUMPHERE(jump);
3805 ph10 836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3806 ph10 664 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3807     OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3808     OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3809     sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3810     }
3811    
3812     #undef LCC_TABLE
3813     #undef CHAR1
3814     #undef CHAR2
3815    
3816 ph10 836 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3817 ph10 664
3818 zherczeg 1245 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3819 ph10 664 {
3820     /* This function would be ineffective to do in JIT level. */
3821 zherczeg 1047 pcre_uint32 c1, c2;
3822 zherczeg 929 const pcre_uchar *src2 = args->uchar_ptr;
3823 ph10 836 const pcre_uchar *end2 = args->end;
3824 zherczeg 1047 const ucd_record *ur;
3825     const pcre_uint32 *pp;
3826 ph10 664
3827     while (src1 < end1)
3828     {
3829     if (src2 >= end2)
3830 zherczeg 915 return (pcre_uchar*)1;
3831 ph10 664 GETCHARINC(c1, src1);
3832     GETCHARINC(c2, src2);
3833 zherczeg 1047 ur = GET_UCD(c2);
3834     if (c1 != c2 && c1 != c2 + ur->other_case)
3835     {
3836     pp = PRIV(ucd_caseless_sets) + ur->caseset;
3837     for (;;)
3838     {
3839     if (c1 < *pp) return NULL;
3840     if (c1 == *pp++) break;
3841     }
3842     }
3843 ph10 664 }
3844     return src2;
3845     }
3846    
3847 ph10 836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3848 ph10 664
3849 ph10 836 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3850 zherczeg 970 compare_context* context, jump_list **backtracks)
3851 ph10 664 {
3852     DEFINE_COMPILER;
3853     unsigned int othercasebit = 0;
3854 ph10 836 pcre_uchar *othercasechar = NULL;
3855     #ifdef SUPPORT_UTF
3856     int utflength;
3857 ph10 664 #endif
3858    
3859     if (caseless && char_has_othercase(common, cc))
3860     {
3861     othercasebit = char_get_othercase_bit(common, cc);
3862     SLJIT_ASSERT(othercasebit);
3863     /* Extracting bit difference info. */
3864 chpe 1055 #if defined COMPILE_PCRE8
3865 ph10 836 othercasechar = cc + (othercasebit >> 8);
3866 ph10 664 othercasebit &= 0xff;
3867 chpe 1055 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3868 chpe 1121 /* Note that this code only handles characters in the BMP. If there
3869 ph10 1221 ever are characters outside the BMP whose othercase differs in only one
3870 chpe 1121 bit from itself (there currently are none), this code will need to be
3871     revised for COMPILE_PCRE32. */
3872 ph10 836 othercasechar = cc + (othercasebit >> 9);
3873     if ((othercasebit & 0x100) != 0)
3874     othercasebit = (othercasebit & 0xff) << 8;
3875     else
3876     othercasebit &= 0xff;
3877 chpe 1055 #endif /* COMPILE_PCRE[8|16|32] */
3878 ph10 664 }
3879    
3880     if (context->sourcereg == -1)
3881     {
3882 chpe 1055 #if defined COMPILE_PCRE8
3883 ph10 664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3884     if (context->length >= 4)
3885     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3886     else if (context->length >= 2)
3887 zherczeg 847 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3888 ph10 664 else
3889     #endif
3890     OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3891 chpe 1055 #elif defined COMPILE_PCRE16
3892 ph10 836 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3893     if (context->length >= 4)
3894     OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3895     else
3896     #endif
3897 chpe 1055 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3898     #elif defined COMPILE_PCRE32
3899     OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3900     #endif /* COMPILE_PCRE[8|16|32] */
3901 ph10 664 context->sourcereg = TMP2;
3902     }
3903    
3904 ph10 836 #ifdef SUPPORT_UTF
3905     utflength = 1;
3906     if (common->utf && HAS_EXTRALEN(*cc))
3907     utflength += GET_EXTRALEN(*cc);
3908 ph10 664
3909     do
3910     {
3911     #endif
3912