/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 551 - (hide annotations) (download)
Sun Oct 10 17:33:07 2010 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 186694 byte(s)
Make (*COMMIT) override (*THEN) and similar.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259     RM61, RM62 };
260 ph10 164
261 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
262 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 ph10 501 actually used in this definition. */
264 nigel 77
265     #ifndef NO_RECURSE
266     #define REGISTER register
267 ph10 164
268 ph10 475 #ifdef PCRE_DEBUG
269 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 nigel 87 { \
271     printf("match() called in line %d\n", __LINE__); \
272 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 nigel 87 printf("to line %d\n", __LINE__); \
274     }
275     #define RRETURN(ra) \
276     { \
277     printf("match() returned %d from line %d ", ra, __LINE__); \
278     return ra; \
279     }
280     #else
281 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 nigel 77 #define RRETURN(ra) return ra
284 nigel 87 #endif
285    
286 nigel 77 #else
287    
288    
289 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
290     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291     argument of match(), which never changes. */
292 nigel 77
293     #define REGISTER
294    
295 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 nigel 77 {\
297     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 ph10 164 frame->Xwhere = rw; \
300     newframe->Xeptr = ra;\
301     newframe->Xecode = rb;\
302 ph10 168 newframe->Xmstart = mstart;\
303 ph10 501 newframe->Xmarkptr = markptr;\
304 ph10 164 newframe->Xoffset_top = rc;\
305     newframe->Xims = re;\
306     newframe->Xeptrb = rf;\
307     newframe->Xflags = rg;\
308     newframe->Xrdepth = frame->Xrdepth + 1;\
309     newframe->Xprevframe = frame;\
310     frame = newframe;\
311     DPRINTF(("restarting from line %d\n", __LINE__));\
312     goto HEAP_RECURSE;\
313     L_##rw:\
314     DPRINTF(("jumped back to line %d\n", __LINE__));\
315 nigel 77 }
316    
317     #define RRETURN(ra)\
318     {\
319 ph10 527 heapframe *oldframe = frame;\
320     frame = oldframe->Xprevframe;\
321     (pcre_stack_free)(oldframe);\
322 nigel 77 if (frame != NULL)\
323     {\
324 ph10 164 rrc = ra;\
325     goto HEAP_RETURN;\
326 nigel 77 }\
327     return ra;\
328     }
329    
330    
331     /* Structure for remembering the local variables in a private frame */
332    
333     typedef struct heapframe {
334     struct heapframe *Xprevframe;
335    
336     /* Function arguments that may change */
337    
338 ph10 409 USPTR Xeptr;
339 nigel 77 const uschar *Xecode;
340 ph10 409 USPTR Xmstart;
341 ph10 501 USPTR Xmarkptr;
342 nigel 77 int Xoffset_top;
343     long int Xims;
344     eptrblock *Xeptrb;
345     int Xflags;
346 nigel 91 unsigned int Xrdepth;
347 nigel 77
348     /* Function local variables */
349    
350 ph10 409 USPTR Xcallpat;
351 ph10 406 #ifdef SUPPORT_UTF8
352 ph10 409 USPTR Xcharptr;
353 ph10 406 #endif
354 ph10 409 USPTR Xdata;
355     USPTR Xnext;
356     USPTR Xpp;
357     USPTR Xprev;
358     USPTR Xsaved_eptr;
359 nigel 77
360     recursion_info Xnew_recursive;
361    
362     BOOL Xcur_is_word;
363     BOOL Xcondition;
364     BOOL Xprev_is_word;
365    
366     unsigned long int Xoriginal_ims;
367    
368     #ifdef SUPPORT_UCP
369     int Xprop_type;
370 nigel 87 int Xprop_value;
371 nigel 77 int Xprop_fail_result;
372     int Xprop_category;
373     int Xprop_chartype;
374 nigel 87 int Xprop_script;
375 ph10 123 int Xoclength;
376     uschar Xocchars[8];
377 nigel 77 #endif
378    
379 ph10 403 int Xcodelink;
380 nigel 77 int Xctype;
381 nigel 93 unsigned int Xfc;
382 nigel 77 int Xfi;
383     int Xlength;
384     int Xmax;
385     int Xmin;
386     int Xnumber;
387     int Xoffset;
388     int Xop;
389     int Xsave_capture_last;
390     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
391     int Xstacksave[REC_STACK_SAVE_MAX];
392    
393     eptrblock Xnewptrb;
394    
395 ph10 164 /* Where to jump back to */
396 nigel 77
397 ph10 164 int Xwhere;
398 ph10 165
399 nigel 77 } heapframe;
400    
401     #endif
402    
403    
404     /***************************************************************************
405     ***************************************************************************/
406    
407    
408    
409     /*************************************************
410     * Match from current position *
411     *************************************************/
412    
413 nigel 93 /* This function is called recursively in many circumstances. Whenever it
414 nigel 77 returns a negative (error) response, the outer incarnation must also return the
415 ph10 426 same response. */
416 nigel 77
417 ph10 426 /* These macros pack up tests that are used for partial matching, and which
418     appears several times in the code. We set the "hit end" flag if the pointer is
419     at the end of the subject and also past the start of the subject (i.e.
420 ph10 427 something has been matched). For hard partial matching, we then return
421     immediately. The second one is used when we already know we are past the end of
422     the subject. */
423 ph10 426
424     #define CHECK_PARTIAL()\
425 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426 ph10 427 {\
427     md->hitend = TRUE;\
428 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429 ph10 427 }
430 ph10 426
431     #define SCHECK_PARTIAL()\
432 ph10 462 if (md->partial != 0 && eptr > mstart)\
433 ph10 427 {\
434     md->hitend = TRUE;\
435 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436 ph10 427 }
437 ph10 426
438 ph10 427
439 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
440     the md structure (e.g. utf8, end_subject) into individual variables to improve
441 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
442     made performance worse.
443    
444     Arguments:
445 nigel 93 eptr pointer to current character in subject
446     ecode pointer to current position in compiled code
447 ph10 168 mstart pointer to the current match start position (can be modified
448 ph10 172 by encountering \K)
449 ph10 501 markptr pointer to the most recent MARK name, or NULL
450 nigel 77 offset_top current top pointer
451     md pointer to "static" info for the match
452     ims current /i, /m, and /s options
453     eptrb pointer to chain of blocks containing eptr at start of
454     brackets - for testing for empty matches
455     flags can contain
456     match_condassert - this is an assertion condition
457 nigel 93 match_cbegroup - this is the start of an unlimited repeat
458     group that can match an empty string
459 nigel 87 rdepth the recursion depth
460 nigel 77
461     Returns: MATCH_MATCH if matched ) these values are >= 0
462     MATCH_NOMATCH if failed to match )
463 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
464 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
465 nigel 87 (e.g. stopped by repeated call or recursion limit)
466 nigel 77 */
467    
468     static int
469 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
472 nigel 77 {
473     /* These variables do not need to be preserved over recursion in this function,
474 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
475     "register" because they are used a lot in loops. */
476 nigel 77
477 nigel 91 register int rrc; /* Returns from recursive calls */
478     register int i; /* Used for loops not involving calls to RMATCH() */
479 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
480 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
481 nigel 77
482 nigel 93 BOOL minimize, possessive; /* Quantifier options */
483 ph10 403 int condcode;
484 nigel 93
485 nigel 77 /* When recursion is not being used, all "local" variables that have to be
486     preserved over calls to RMATCH() are part of a "frame" which is obtained from
487     heap storage. Set up the top-level frame here; others are obtained from the
488     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
489    
490     #ifdef NO_RECURSE
491     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
494    
495     /* Copy in the original argument variables */
496    
497     frame->Xeptr = eptr;
498     frame->Xecode = ecode;
499 ph10 168 frame->Xmstart = mstart;
500 ph10 501 frame->Xmarkptr = markptr;
501 nigel 77 frame->Xoffset_top = offset_top;
502     frame->Xims = ims;
503     frame->Xeptrb = eptrb;
504     frame->Xflags = flags;
505 nigel 87 frame->Xrdepth = rdepth;
506 nigel 77
507     /* This is where control jumps back to to effect "recursion" */
508    
509     HEAP_RECURSE:
510    
511     /* Macros make the argument variables come from the current frame */
512    
513     #define eptr frame->Xeptr
514     #define ecode frame->Xecode
515 ph10 168 #define mstart frame->Xmstart
516 ph10 501 #define markptr frame->Xmarkptr
517 nigel 77 #define offset_top frame->Xoffset_top
518     #define ims frame->Xims
519     #define eptrb frame->Xeptrb
520     #define flags frame->Xflags
521 nigel 87 #define rdepth frame->Xrdepth
522 nigel 77
523     /* Ditto for the local variables */
524    
525     #ifdef SUPPORT_UTF8
526     #define charptr frame->Xcharptr
527     #endif
528     #define callpat frame->Xcallpat
529 ph10 403 #define codelink frame->Xcodelink
530 nigel 77 #define data frame->Xdata
531     #define next frame->Xnext
532     #define pp frame->Xpp
533     #define prev frame->Xprev
534     #define saved_eptr frame->Xsaved_eptr
535    
536     #define new_recursive frame->Xnew_recursive
537    
538     #define cur_is_word frame->Xcur_is_word
539     #define condition frame->Xcondition
540     #define prev_is_word frame->Xprev_is_word
541    
542     #define original_ims frame->Xoriginal_ims
543    
544     #ifdef SUPPORT_UCP
545     #define prop_type frame->Xprop_type
546 nigel 87 #define prop_value frame->Xprop_value
547 nigel 77 #define prop_fail_result frame->Xprop_fail_result
548     #define prop_category frame->Xprop_category
549     #define prop_chartype frame->Xprop_chartype
550 nigel 87 #define prop_script frame->Xprop_script
551 ph10 115 #define oclength frame->Xoclength
552     #define occhars frame->Xocchars
553 nigel 77 #endif
554    
555     #define ctype frame->Xctype
556     #define fc frame->Xfc
557     #define fi frame->Xfi
558     #define length frame->Xlength
559     #define max frame->Xmax
560     #define min frame->Xmin
561     #define number frame->Xnumber
562     #define offset frame->Xoffset
563     #define op frame->Xop
564     #define save_capture_last frame->Xsave_capture_last
565     #define save_offset1 frame->Xsave_offset1
566     #define save_offset2 frame->Xsave_offset2
567     #define save_offset3 frame->Xsave_offset3
568     #define stacksave frame->Xstacksave
569    
570     #define newptrb frame->Xnewptrb
571    
572     /* When recursion is being used, local variables are allocated on the stack and
573     get preserved during recursion in the normal way. In this environment, fi and
574     i, and fc and c, can be the same variables. */
575    
576 nigel 93 #else /* NO_RECURSE not defined */
577 nigel 77 #define fi i
578     #define fc c
579    
580    
581 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
582     const uschar *charptr; /* in small blocks of the code. My normal */
583     #endif /* style of coding would have declared */
584     const uschar *callpat; /* them within each of those blocks. */
585     const uschar *data; /* However, in order to accommodate the */
586     const uschar *next; /* version of this code that uses an */
587     USPTR pp; /* external "stack" implemented on the */
588     const uschar *prev; /* heap, it is easier to declare them all */
589     USPTR saved_eptr; /* here, so the declarations can be cut */
590     /* out in a block. The only declarations */
591     recursion_info new_recursive; /* within blocks below are for variables */
592     /* that do not have to be preserved over */
593     BOOL cur_is_word; /* a recursive call to RMATCH(). */
594     BOOL condition;
595 nigel 77 BOOL prev_is_word;
596    
597     unsigned long int original_ims;
598    
599     #ifdef SUPPORT_UCP
600     int prop_type;
601 nigel 87 int prop_value;
602 nigel 77 int prop_fail_result;
603     int prop_category;
604     int prop_chartype;
605 nigel 87 int prop_script;
606 ph10 115 int oclength;
607     uschar occhars[8];
608 nigel 77 #endif
609    
610 ph10 399 int codelink;
611 nigel 77 int ctype;
612     int length;
613     int max;
614     int min;
615     int number;
616     int offset;
617     int op;
618     int save_capture_last;
619     int save_offset1, save_offset2, save_offset3;
620     int stacksave[REC_STACK_SAVE_MAX];
621    
622     eptrblock newptrb;
623 nigel 93 #endif /* NO_RECURSE */
624 nigel 77
625     /* These statements are here to stop the compiler complaining about unitialized
626     variables. */
627    
628     #ifdef SUPPORT_UCP
629 nigel 87 prop_value = 0;
630 nigel 77 prop_fail_result = 0;
631     #endif
632    
633 nigel 93
634 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
635     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636     used. Thanks to Ian Taylor for noticing this possibility and sending the
637     original patch. */
638    
639     TAIL_RECURSE:
640    
641 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
642     are specified by the macro RMATCH and RRETURN is used to return. When
643     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
646     complicated macro. It has to be used in one particular way. This shouldn't,
647     however, impact performance when true recursion is being used. */
648 nigel 77
649 ph10 164 #ifdef SUPPORT_UTF8
650     utf8 = md->utf8; /* Local copy of the flag */
651     #else
652     utf8 = FALSE;
653     #endif
654    
655 nigel 87 /* First check that we haven't called match() too many times, or that we
656     haven't exceeded the recursive call limit. */
657    
658 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
659 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
660 nigel 77
661     original_ims = ims; /* Save for resetting on ')' */
662 nigel 91
663 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
664     string, the match_cbegroup flag is set. When this is the case, add the current
665     subject pointer to the chain of such remembered pointers, to be checked when we
666     hit the closing ket, in order to break infinite loops that match no characters.
667 ph10 197 When match() is called in other circumstances, don't add to the chain. The
668     match_cbegroup flag must NOT be used with tail recursion, because the memory
669     block that is used is on the stack, so a new one may be required for each
670     match(). */
671 nigel 77
672 nigel 93 if ((flags & match_cbegroup) != 0)
673 nigel 77 {
674 ph10 197 newptrb.epb_saved_eptr = eptr;
675     newptrb.epb_prev = eptrb;
676     eptrb = &newptrb;
677 nigel 77 }
678    
679 nigel 93 /* Now start processing the opcodes. */
680 nigel 77
681     for (;;)
682     {
683 nigel 93 minimize = possessive = FALSE;
684 nigel 77 op = *ecode;
685 ph10 443
686 nigel 93 switch(op)
687     {
688 ph10 510 case OP_MARK:
689     markptr = ecode + 2;
690     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691 ph10 512 ims, eptrb, flags, RM55);
692    
693     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694     argument, and we must check whether that argument matches this MARK's
695     argument. It is passed back in md->start_match_ptr (an overloading of that
696     variable). If it does match, we reset that variable to the current subject
697     position and return MATCH_SKIP. Otherwise, pass back the return code
698 ph10 510 unaltered. */
699 ph10 512
700     if (rrc == MATCH_SKIP_ARG &&
701 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702     {
703     md->start_match_ptr = eptr;
704     RRETURN(MATCH_SKIP);
705     }
706    
707 ph10 512 if (md->mark == NULL) md->mark = markptr;
708 ph10 510 RRETURN(rrc);
709    
710 ph10 210 case OP_FAIL:
711 ph10 510 MRRETURN(MATCH_NOMATCH);
712 ph10 211
713 ph10 551 /* COMMIT overrides PRUNE, SKIP, and THEN */
714    
715 ph10 510 case OP_COMMIT:
716     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717     ims, eptrb, flags, RM52);
718 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
719     rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
720     rrc != MATCH_THEN)
721     RRETURN(rrc);
722 ph10 510 MRRETURN(MATCH_COMMIT);
723    
724 ph10 551 /* PRUNE overrides THEN */
725    
726 ph10 210 case OP_PRUNE:
727     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
728     ims, eptrb, flags, RM51);
729 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
730 ph10 510 MRRETURN(MATCH_PRUNE);
731 ph10 211
732 ph10 510 case OP_PRUNE_ARG:
733     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
734 ph10 512 ims, eptrb, flags, RM56);
735 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736 ph10 510 md->mark = ecode + 2;
737     RRETURN(MATCH_PRUNE);
738 ph10 211
739 ph10 551 /* SKIP overrides PRUNE and THEN */
740    
741 ph10 210 case OP_SKIP:
742     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
743     ims, eptrb, flags, RM53);
744 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
745     RRETURN(rrc);
746 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
747 ph10 510 MRRETURN(MATCH_SKIP);
748 ph10 211
749 ph10 510 case OP_SKIP_ARG:
750     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
751 ph10 512 ims, eptrb, flags, RM57);
752 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
753     RRETURN(rrc);
754 ph10 512
755     /* Pass back the current skip name by overloading md->start_match_ptr and
756     returning the special MATCH_SKIP_ARG return code. This will either be
757     caught by a matching MARK, or get to the top, where it is treated the same
758 ph10 510 as PRUNE. */
759 ph10 512
760 ph10 510 md->start_match_ptr = ecode + 2;
761 ph10 512 RRETURN(MATCH_SKIP_ARG);
762 ph10 550
763     /* For THEN (and THEN_ARG) we pass back the address of the bracket or
764     the alt that is at the start of the current branch. This makes it possible
765     to skip back past alternatives that precede the THEN within the current
766     branch. */
767 ph10 512
768 ph10 210 case OP_THEN:
769     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
770 ph10 212 ims, eptrb, flags, RM54);
771 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
772 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
773 ph10 510 MRRETURN(MATCH_THEN);
774    
775     case OP_THEN_ARG:
776 ph10 550 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
777     offset_top, md, ims, eptrb, flags, RM58);
778 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
779 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
780     md->mark = ecode + LINK_SIZE + 2;
781 ph10 212 RRETURN(MATCH_THEN);
782 ph10 211
783 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
784     the current subject position in the working slot at the top of the vector.
785     We mustn't change the current values of the data slot, because they may be
786     set from a previous iteration of this group, and be referred to by a
787     reference inside the group.
788 nigel 77
789 nigel 93 If the bracket fails to match, we need to restore this value and also the
790     values of the final offsets, in case they were set by a previous iteration
791     of the same bracket.
792 nigel 77
793 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
794     a non-capturing bracket. Don't worry about setting the flag for the error
795     case here; that is handled in the code for KET. */
796 nigel 77
797 nigel 93 case OP_CBRA:
798     case OP_SCBRA:
799     number = GET2(ecode, 1+LINK_SIZE);
800 nigel 77 offset = number << 1;
801    
802 ph10 475 #ifdef PCRE_DEBUG
803 nigel 93 printf("start bracket %d\n", number);
804     printf("subject=");
805 nigel 77 pchars(eptr, 16, TRUE, md);
806     printf("\n");
807     #endif
808    
809     if (offset < md->offset_max)
810     {
811     save_offset1 = md->offset_vector[offset];
812     save_offset2 = md->offset_vector[offset+1];
813     save_offset3 = md->offset_vector[md->offset_end - number];
814     save_capture_last = md->capture_last;
815    
816     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
817 ph10 531 md->offset_vector[md->offset_end - number] =
818 ph10 530 (int)(eptr - md->start_subject);
819 nigel 77
820 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
821 nigel 77 do
822     {
823 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
824     ims, eptrb, flags, RM1);
825 ph10 550 if (rrc != MATCH_NOMATCH &&
826     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
827     RRETURN(rrc);
828 nigel 77 md->capture_last = save_capture_last;
829     ecode += GET(ecode, 1);
830     }
831     while (*ecode == OP_ALT);
832    
833     DPRINTF(("bracket %d failed\n", number));
834    
835     md->offset_vector[offset] = save_offset1;
836     md->offset_vector[offset+1] = save_offset2;
837     md->offset_vector[md->offset_end - number] = save_offset3;
838    
839 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
840 nigel 77 RRETURN(MATCH_NOMATCH);
841     }
842    
843 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
844     as a non-capturing bracket. */
845 nigel 77
846 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
847     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848    
849 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
850 nigel 77
851 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
852     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853    
854 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
855     final alternative within the brackets, we would return the result of a
856     recursive call to match() whatever happened. We can reduce stack usage by
857 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
858     is set.*/
859 nigel 77
860 nigel 93 case OP_BRA:
861     case OP_SBRA:
862     DPRINTF(("start non-capturing bracket\n"));
863     flags = (op >= OP_SBRA)? match_cbegroup : 0;
864 nigel 91 for (;;)
865 nigel 77 {
866 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
867 nigel 93 {
868 ph10 197 if (flags == 0) /* Not a possibly empty group */
869     {
870     ecode += _pcre_OP_lengths[*ecode];
871     DPRINTF(("bracket 0 tail recursion\n"));
872     goto TAIL_RECURSE;
873     }
874    
875     /* Possibly empty group; can't use tail recursion. */
876    
877     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
878     eptrb, flags, RM48);
879 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
880     RRETURN(rrc);
881 nigel 93 }
882 nigel 91
883     /* For non-final alternatives, continue the loop for a NOMATCH result;
884     otherwise return. */
885    
886 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
887     eptrb, flags, RM2);
888 ph10 550 if (rrc != MATCH_NOMATCH &&
889     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
890     RRETURN(rrc);
891 nigel 77 ecode += GET(ecode, 1);
892     }
893 nigel 91 /* Control never reaches here. */
894 nigel 77
895     /* Conditional group: compilation checked that there are no more than
896     two branches. If the condition is false, skipping the first branch takes us
897     past the end if there is only one branch, but that's OK because that is
898 nigel 91 exactly what going to the ket would do. As there is only one branch to be
899     obeyed, we can use tail recursion to avoid using another stack frame. */
900 nigel 77
901     case OP_COND:
902 nigel 93 case OP_SCOND:
903 ph10 399 codelink= GET(ecode, 1);
904 ph10 406
905 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
906     inserted between OP_COND and an assertion condition. */
907 ph10 392
908 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
909     {
910     if (pcre_callout != NULL)
911     {
912     pcre_callout_block cb;
913     cb.version = 1; /* Version 1 of the callout block */
914     cb.callout_number = ecode[LINK_SIZE+2];
915     cb.offset_vector = md->offset_vector;
916     cb.subject = (PCRE_SPTR)md->start_subject;
917 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
918     cb.start_match = (int)(mstart - md->start_subject);
919     cb.current_position = (int)(eptr - md->start_subject);
920 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
921     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
922     cb.capture_top = offset_top/2;
923     cb.capture_last = md->capture_last;
924     cb.callout_data = md->callout_data;
925 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
926 ph10 381 if (rrc < 0) RRETURN(rrc);
927     }
928     ecode += _pcre_OP_lengths[OP_CALLOUT];
929     }
930 ph10 392
931 ph10 399 condcode = ecode[LINK_SIZE+1];
932 ph10 406
933 ph10 381 /* Now see what the actual condition is */
934 ph10 392
935 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
936 nigel 77 {
937 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
938     {
939 ph10 461 condition = FALSE;
940     ecode += GET(ecode, 1);
941     }
942 ph10 459 else
943 ph10 461 {
944 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
945     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
946 ph10 461
947 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
948     false, but the test was set up by name, scan the table to see if the
949     name refers to any other numbers, and test them. The condition is true
950     if any one is set. */
951 ph10 461
952 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
953     {
954     uschar *slotA = md->name_table;
955     for (i = 0; i < md->name_count; i++)
956 ph10 461 {
957     if (GET2(slotA, 0) == recno) break;
958 ph10 459 slotA += md->name_entry_size;
959     }
960 ph10 461
961 ph10 459 /* Found a name for the number - there can be only one; duplicate
962     names for different numbers are allowed, but not vice versa. First
963     scan down for duplicates. */
964 ph10 461
965 ph10 459 if (i < md->name_count)
966 ph10 461 {
967 ph10 459 uschar *slotB = slotA;
968     while (slotB > md->name_table)
969     {
970     slotB -= md->name_entry_size;
971     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
972     {
973     condition = GET2(slotB, 0) == md->recursive->group_num;
974 ph10 461 if (condition) break;
975     }
976 ph10 459 else break;
977 ph10 461 }
978    
979 ph10 459 /* Scan up for duplicates */
980 ph10 461
981 ph10 459 if (!condition)
982 ph10 461 {
983 ph10 459 slotB = slotA;
984     for (i++; i < md->name_count; i++)
985     {
986     slotB += md->name_entry_size;
987     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
988     {
989     condition = GET2(slotB, 0) == md->recursive->group_num;
990     if (condition) break;
991 ph10 461 }
992 ph10 459 else break;
993 ph10 461 }
994     }
995 ph10 459 }
996 ph10 461 }
997    
998 ph10 459 /* Chose branch according to the condition */
999 ph10 461
1000 ph10 459 ecode += condition? 3 : GET(ecode, 1);
1001     }
1002 ph10 461 }
1003 nigel 93
1004 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1005 nigel 93 {
1006 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1007 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1008 ph10 461
1009 ph10 459 /* If the numbered capture is unset, but the reference was by name,
1010 ph10 461 scan the table to see if the name refers to any other numbers, and test
1011     them. The condition is true if any one is set. This is tediously similar
1012     to the code above, but not close enough to try to amalgamate. */
1013    
1014 ph10 459 if (!condition && condcode == OP_NCREF)
1015     {
1016 ph10 461 int refno = offset >> 1;
1017 ph10 459 uschar *slotA = md->name_table;
1018 ph10 461
1019 ph10 459 for (i = 0; i < md->name_count; i++)
1020 ph10 461 {
1021     if (GET2(slotA, 0) == refno) break;
1022 ph10 459 slotA += md->name_entry_size;
1023     }
1024 ph10 461
1025     /* Found a name for the number - there can be only one; duplicate names
1026     for different numbers are allowed, but not vice versa. First scan down
1027 ph10 459 for duplicates. */
1028 ph10 461
1029 ph10 459 if (i < md->name_count)
1030 ph10 461 {
1031 ph10 459 uschar *slotB = slotA;
1032     while (slotB > md->name_table)
1033     {
1034     slotB -= md->name_entry_size;
1035     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1036     {
1037     offset = GET2(slotB, 0) << 1;
1038 ph10 461 condition = offset < offset_top &&
1039 ph10 459 md->offset_vector[offset] >= 0;
1040 ph10 461 if (condition) break;
1041     }
1042 ph10 459 else break;
1043 ph10 461 }
1044    
1045 ph10 459 /* Scan up for duplicates */
1046 ph10 461
1047 ph10 459 if (!condition)
1048 ph10 461 {
1049 ph10 459 slotB = slotA;
1050     for (i++; i < md->name_count; i++)
1051     {
1052     slotB += md->name_entry_size;
1053     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054     {
1055     offset = GET2(slotB, 0) << 1;
1056 ph10 461 condition = offset < offset_top &&
1057 ph10 459 md->offset_vector[offset] >= 0;
1058 ph10 461 if (condition) break;
1059     }
1060 ph10 459 else break;
1061 ph10 461 }
1062     }
1063 ph10 459 }
1064 ph10 461 }
1065    
1066 ph10 459 /* Chose branch according to the condition */
1067    
1068 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1069 nigel 77 }
1070    
1071 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1072 nigel 93 {
1073     condition = FALSE;
1074     ecode += GET(ecode, 1);
1075     }
1076    
1077 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1078 nigel 93 the final argument match_condassert causes it to stop at the end of an
1079     assertion. */
1080 nigel 77
1081     else
1082     {
1083 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1084     match_condassert, RM3);
1085 nigel 77 if (rrc == MATCH_MATCH)
1086     {
1087 nigel 93 condition = TRUE;
1088     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1089 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1090     }
1091 ph10 550 else if (rrc != MATCH_NOMATCH &&
1092     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1093 nigel 77 {
1094     RRETURN(rrc); /* Need braces because of following else */
1095     }
1096 nigel 93 else
1097     {
1098     condition = FALSE;
1099 ph10 399 ecode += codelink;
1100 nigel 93 }
1101     }
1102 nigel 91
1103 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1104 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1105     match_cbegroup is required for an unlimited repeat of a possibly empty
1106     group. If the second alternative doesn't exist, we can just plough on. */
1107 nigel 91
1108 nigel 93 if (condition || *ecode == OP_ALT)
1109     {
1110 nigel 91 ecode += 1 + LINK_SIZE;
1111 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1112     {
1113     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1114     RRETURN(rrc);
1115     }
1116     else /* Group must match something */
1117     {
1118     flags = 0;
1119     goto TAIL_RECURSE;
1120     }
1121 nigel 77 }
1122 ph10 395 else /* Condition false & no alternative */
1123 nigel 93 {
1124     ecode += 1 + LINK_SIZE;
1125     }
1126     break;
1127 nigel 77
1128 ph10 461
1129 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1130     to close any currently open capturing brackets. */
1131 ph10 461
1132 ph10 447 case OP_CLOSE:
1133 ph10 461 number = GET2(ecode, 1);
1134 ph10 447 offset = number << 1;
1135 ph10 461
1136 ph10 475 #ifdef PCRE_DEBUG
1137 ph10 447 printf("end bracket %d at *ACCEPT", number);
1138     printf("\n");
1139     #endif
1140 nigel 77
1141 ph10 447 md->capture_last = number;
1142     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143     {
1144     md->offset_vector[offset] =
1145     md->offset_vector[md->offset_end - number];
1146 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1147 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1148     }
1149     ecode += 3;
1150 ph10 461 break;
1151 ph10 447
1152    
1153 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1154     recursion, we should restore the offsets appropriately and continue from
1155     after the call. */
1156 nigel 77
1157 ph10 210 case OP_ACCEPT:
1158 nigel 77 case OP_END:
1159     if (md->recursive != NULL && md->recursive->group_num == 0)
1160     {
1161     recursion_info *rec = md->recursive;
1162 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1163 nigel 77 md->recursive = rec->prevrec;
1164     memmove(md->offset_vector, rec->offset_save,
1165     rec->saved_max * sizeof(int));
1166 ph10 461 offset_top = rec->save_offset_top;
1167 nigel 77 ims = original_ims;
1168     ecode = rec->after_call;
1169     break;
1170     }
1171    
1172 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1173     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1174     the subject. In both cases, backtracking will then try other alternatives,
1175     if any. */
1176 ph10 443
1177 ph10 442 if (eptr == mstart &&
1178     (md->notempty ||
1179 ph10 443 (md->notempty_atstart &&
1180 ph10 442 mstart == md->start_subject + md->start_offset)))
1181 ph10 510 MRRETURN(MATCH_NOMATCH);
1182 ph10 443
1183 ph10 442 /* Otherwise, we have a match. */
1184 nigel 77
1185 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1186     md->end_offset_top = offset_top; /* and how many extracts were taken */
1187 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1188 nigel 77
1189 ph10 512 /* For some reason, the macros don't work properly if an expression is
1190     given as the argument to MRRETURN when the heap is in use. */
1191    
1192     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1193     MRRETURN(rrc);
1194    
1195 nigel 77 /* Change option settings */
1196    
1197     case OP_OPT:
1198     ims = ecode[1];
1199     ecode += 2;
1200     DPRINTF(("ims set to %02lx\n", ims));
1201     break;
1202    
1203     /* Assertion brackets. Check the alternative branches in turn - the
1204     matching won't pass the KET for an assertion. If any one branch matches,
1205     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1206     start of each branch to move the current point backwards, so the code at
1207     this level is identical to the lookahead case. */
1208    
1209     case OP_ASSERT:
1210     case OP_ASSERTBACK:
1211     do
1212     {
1213 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214     RM4);
1215 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1216 ph10 500 {
1217     mstart = md->start_match_ptr; /* In case \K reset it */
1218     break;
1219 ph10 501 }
1220 ph10 550 if (rrc != MATCH_NOMATCH &&
1221     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1222     RRETURN(rrc);
1223 nigel 77 ecode += GET(ecode, 1);
1224     }
1225     while (*ecode == OP_ALT);
1226 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1227 nigel 77
1228     /* If checking an assertion for a condition, return MATCH_MATCH. */
1229    
1230     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1231    
1232     /* Continue from after the assertion, updating the offsets high water
1233     mark, since extracts may have been taken during the assertion. */
1234    
1235     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236     ecode += 1 + LINK_SIZE;
1237     offset_top = md->end_offset_top;
1238     continue;
1239    
1240 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1241 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1242 ph10 473 branches. */
1243 nigel 77
1244     case OP_ASSERT_NOT:
1245     case OP_ASSERTBACK_NOT:
1246     do
1247     {
1248 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1249     RM5);
1250 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1251 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1252     {
1253     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254 ph10 482 break;
1255     }
1256 ph10 550 if (rrc != MATCH_NOMATCH &&
1257     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1258     RRETURN(rrc);
1259 nigel 77 ecode += GET(ecode,1);
1260     }
1261     while (*ecode == OP_ALT);
1262    
1263     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1264    
1265     ecode += 1 + LINK_SIZE;
1266     continue;
1267    
1268     /* Move the subject pointer back. This occurs only at the start of
1269     each branch of a lookbehind assertion. If we are too close to the start to
1270     move back, this match function fails. When working with UTF-8 we move
1271     back a number of characters, not bytes. */
1272    
1273     case OP_REVERSE:
1274     #ifdef SUPPORT_UTF8
1275     if (utf8)
1276     {
1277 nigel 93 i = GET(ecode, 1);
1278     while (i-- > 0)
1279 nigel 77 {
1280     eptr--;
1281 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1282 ph10 207 BACKCHAR(eptr);
1283 nigel 77 }
1284     }
1285     else
1286     #endif
1287    
1288     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1289    
1290     {
1291 nigel 93 eptr -= GET(ecode, 1);
1292 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1293 nigel 77 }
1294    
1295 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1296 nigel 77
1297 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1298 nigel 77 ecode += 1 + LINK_SIZE;
1299     break;
1300    
1301     /* The callout item calls an external function, if one is provided, passing
1302     details of the match so far. This is mainly for debugging, though the
1303     function is able to force a failure. */
1304    
1305     case OP_CALLOUT:
1306     if (pcre_callout != NULL)
1307     {
1308     pcre_callout_block cb;
1309     cb.version = 1; /* Version 1 of the callout block */
1310     cb.callout_number = ecode[1];
1311     cb.offset_vector = md->offset_vector;
1312 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1313 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1314     cb.start_match = (int)(mstart - md->start_subject);
1315     cb.current_position = (int)(eptr - md->start_subject);
1316 nigel 77 cb.pattern_position = GET(ecode, 2);
1317     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1318     cb.capture_top = offset_top/2;
1319     cb.capture_last = md->capture_last;
1320     cb.callout_data = md->callout_data;
1321 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1322 nigel 77 if (rrc < 0) RRETURN(rrc);
1323     }
1324     ecode += 2 + 2*LINK_SIZE;
1325     break;
1326    
1327     /* Recursion either matches the current regex, or some subexpression. The
1328     offset data is the offset to the starting bracket from the start of the
1329     whole pattern. (This is so that it works from duplicated subpatterns.)
1330    
1331     If there are any capturing brackets started but not finished, we have to
1332     save their starting points and reinstate them after the recursion. However,
1333     we don't know how many such there are (offset_top records the completed
1334     total) so we just have to save all the potential data. There may be up to
1335     65535 such values, which is too large to put on the stack, but using malloc
1336     for small numbers seems expensive. As a compromise, the stack is used when
1337     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1338     is used. A problem is what to do if the malloc fails ... there is no way of
1339     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1340     values on the stack, and accept that the rest may be wrong.
1341    
1342     There are also other values that have to be saved. We use a chained
1343     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1344     for the original version of this logic. */
1345    
1346     case OP_RECURSE:
1347     {
1348     callpat = md->start_code + GET(ecode, 1);
1349 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1350     GET2(callpat, 1 + LINK_SIZE);
1351 nigel 77
1352     /* Add to "recursing stack" */
1353    
1354     new_recursive.prevrec = md->recursive;
1355     md->recursive = &new_recursive;
1356    
1357     /* Find where to continue from afterwards */
1358    
1359     ecode += 1 + LINK_SIZE;
1360     new_recursive.after_call = ecode;
1361    
1362     /* Now save the offset data. */
1363    
1364     new_recursive.saved_max = md->offset_end;
1365     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1366     new_recursive.offset_save = stacksave;
1367     else
1368     {
1369     new_recursive.offset_save =
1370     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1371     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1372     }
1373    
1374     memcpy(new_recursive.offset_save, md->offset_vector,
1375     new_recursive.saved_max * sizeof(int));
1376 ph10 461 new_recursive.save_offset_top = offset_top;
1377 nigel 77
1378     /* OK, now we can do the recursion. For each top-level alternative we
1379     restore the offset and recursion data. */
1380    
1381     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1382 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1383 nigel 77 do
1384     {
1385 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1386     md, ims, eptrb, flags, RM6);
1387 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1388 nigel 77 {
1389 nigel 87 DPRINTF(("Recursion matched\n"));
1390 nigel 77 md->recursive = new_recursive.prevrec;
1391     if (new_recursive.offset_save != stacksave)
1392     (pcre_free)(new_recursive.offset_save);
1393 ph10 510 MRRETURN(MATCH_MATCH);
1394 nigel 77 }
1395 ph10 550 else if (rrc != MATCH_NOMATCH &&
1396     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1397 nigel 87 {
1398     DPRINTF(("Recursion gave error %d\n", rrc));
1399 ph10 400 if (new_recursive.offset_save != stacksave)
1400     (pcre_free)(new_recursive.offset_save);
1401 nigel 87 RRETURN(rrc);
1402     }
1403 nigel 77
1404     md->recursive = &new_recursive;
1405     memcpy(md->offset_vector, new_recursive.offset_save,
1406     new_recursive.saved_max * sizeof(int));
1407     callpat += GET(callpat, 1);
1408     }
1409     while (*callpat == OP_ALT);
1410    
1411     DPRINTF(("Recursion didn't match\n"));
1412     md->recursive = new_recursive.prevrec;
1413     if (new_recursive.offset_save != stacksave)
1414     (pcre_free)(new_recursive.offset_save);
1415 ph10 510 MRRETURN(MATCH_NOMATCH);
1416 nigel 77 }
1417     /* Control never reaches here */
1418    
1419     /* "Once" brackets are like assertion brackets except that after a match,
1420     the point in the subject string is not moved back. Thus there can never be
1421     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1422     Check the alternative branches in turn - the matching won't pass the KET
1423     for this kind of subpattern. If any one branch matches, we carry on as at
1424 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1425     the start-of-match value in case it was changed by \K. */
1426 nigel 77
1427     case OP_ONCE:
1428 nigel 91 prev = ecode;
1429     saved_eptr = eptr;
1430    
1431     do
1432 nigel 77 {
1433 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1434 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1435 ph10 500 {
1436     mstart = md->start_match_ptr;
1437     break;
1438 ph10 501 }
1439 ph10 550 if (rrc != MATCH_NOMATCH &&
1440     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1441     RRETURN(rrc);
1442 nigel 91 ecode += GET(ecode,1);
1443     }
1444     while (*ecode == OP_ALT);
1445 nigel 77
1446 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1447 nigel 77
1448 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1449 nigel 77
1450 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1451     mark, since extracts may have been taken. */
1452 nigel 77
1453 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1454 nigel 77
1455 nigel 91 offset_top = md->end_offset_top;
1456     eptr = md->end_match_ptr;
1457 nigel 77
1458 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1459     happens for a repeating ket if no characters were matched in the group.
1460     This is the forcible breaking of infinite loops as implemented in Perl
1461     5.005. If there is an options reset, it will get obeyed in the normal
1462     course of events. */
1463 nigel 77
1464 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1465     {
1466     ecode += 1+LINK_SIZE;
1467     break;
1468     }
1469 nigel 77
1470 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1471     preceding bracket, in the appropriate order. The second "call" of match()
1472     uses tail recursion, to avoid using another stack frame. We need to reset
1473     any options that changed within the bracket before re-running it, so
1474     check the next opcode. */
1475 nigel 77
1476 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1477     {
1478     ims = (ims & ~PCRE_IMS) | ecode[4];
1479     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1480     }
1481 nigel 77
1482 nigel 91 if (*ecode == OP_KETRMIN)
1483     {
1484 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1485 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486     ecode = prev;
1487 ph10 197 flags = 0;
1488 nigel 91 goto TAIL_RECURSE;
1489 nigel 77 }
1490 nigel 91 else /* OP_KETRMAX */
1491     {
1492 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1493 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1494     ecode += 1 + LINK_SIZE;
1495 ph10 197 flags = 0;
1496 nigel 91 goto TAIL_RECURSE;
1497     }
1498     /* Control never gets here */
1499 nigel 77
1500     /* An alternation is the end of a branch; scan along to find the end of the
1501     bracketed group and go to there. */
1502    
1503     case OP_ALT:
1504     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1505     break;
1506    
1507 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1508     indicating that it may occur zero times. It may repeat infinitely, or not
1509     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1510     with fixed upper repeat limits are compiled as a number of copies, with the
1511     optional ones preceded by BRAZERO or BRAMINZERO. */
1512 nigel 77
1513     case OP_BRAZERO:
1514     {
1515     next = ecode+1;
1516 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1517 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518     do next += GET(next,1); while (*next == OP_ALT);
1519 nigel 93 ecode = next + 1 + LINK_SIZE;
1520 nigel 77 }
1521     break;
1522    
1523     case OP_BRAMINZERO:
1524     {
1525     next = ecode+1;
1526 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1527 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1528 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1529     ecode++;
1530     }
1531     break;
1532    
1533 ph10 335 case OP_SKIPZERO:
1534     {
1535     next = ecode+1;
1536     do next += GET(next,1); while (*next == OP_ALT);
1537     ecode = next + 1 + LINK_SIZE;
1538     }
1539     break;
1540    
1541 nigel 93 /* End of a group, repeated or non-repeating. */
1542 nigel 77
1543     case OP_KET:
1544     case OP_KETRMIN:
1545     case OP_KETRMAX:
1546 nigel 91 prev = ecode - GET(ecode, 1);
1547 nigel 77
1548 nigel 93 /* If this was a group that remembered the subject start, in order to break
1549     infinite repeats of empty string matches, retrieve the subject start from
1550     the chain. Otherwise, set it NULL. */
1551 nigel 77
1552 nigel 93 if (*prev >= OP_SBRA)
1553     {
1554     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1555     eptrb = eptrb->epb_prev; /* Backup to previous group */
1556     }
1557     else saved_eptr = NULL;
1558 nigel 77
1559 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1560     matching and return MATCH_MATCH, but record the current high water mark for
1561     use by positive assertions. We also need to record the match start in case
1562     it was changed by \K. */
1563 nigel 93
1564 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1565     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1566     *prev == OP_ONCE)
1567     {
1568     md->end_match_ptr = eptr; /* For ONCE */
1569     md->end_offset_top = offset_top;
1570 ph10 500 md->start_match_ptr = mstart;
1571 ph10 510 MRRETURN(MATCH_MATCH);
1572 nigel 91 }
1573 nigel 77
1574 nigel 93 /* For capturing groups we have to check the group number back at the start
1575     and if necessary complete handling an extraction by setting the offsets and
1576     bumping the high water mark. Note that whole-pattern recursion is coded as
1577     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1578     when the OP_END is reached. Other recursion is handled here. */
1579 nigel 77
1580 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1581 nigel 91 {
1582 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1583 nigel 91 offset = number << 1;
1584 ph10 461
1585 ph10 475 #ifdef PCRE_DEBUG
1586 nigel 91 printf("end bracket %d", number);
1587     printf("\n");
1588 nigel 77 #endif
1589    
1590 nigel 93 md->capture_last = number;
1591     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1592 nigel 91 {
1593 nigel 93 md->offset_vector[offset] =
1594     md->offset_vector[md->offset_end - number];
1595 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1596 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1597     }
1598 nigel 77
1599 nigel 93 /* Handle a recursively called group. Restore the offsets
1600     appropriately and continue from after the call. */
1601 nigel 77
1602 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1603     {
1604     recursion_info *rec = md->recursive;
1605     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1606     md->recursive = rec->prevrec;
1607     memcpy(md->offset_vector, rec->offset_save,
1608     rec->saved_max * sizeof(int));
1609 ph10 461 offset_top = rec->save_offset_top;
1610 nigel 93 ecode = rec->after_call;
1611     ims = original_ims;
1612     break;
1613 nigel 77 }
1614 nigel 91 }
1615 nigel 77
1616 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1617     flags, in case they got changed during the group. */
1618 nigel 77
1619 nigel 91 ims = original_ims;
1620     DPRINTF(("ims reset to %02lx\n", ims));
1621 nigel 77
1622 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1623     happens for a repeating ket if no characters were matched in the group.
1624     This is the forcible breaking of infinite loops as implemented in Perl
1625     5.005. If there is an options reset, it will get obeyed in the normal
1626     course of events. */
1627 nigel 77
1628 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1629     {
1630     ecode += 1 + LINK_SIZE;
1631     break;
1632     }
1633 nigel 77
1634 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1635     preceding bracket, in the appropriate order. In the second case, we can use
1636 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1637     unlimited repeat of a group that can match an empty string. */
1638 nigel 77
1639 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1640    
1641 nigel 91 if (*ecode == OP_KETRMIN)
1642     {
1643 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1644 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1645 ph10 197 if (flags != 0) /* Could match an empty string */
1646     {
1647     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1648     RRETURN(rrc);
1649     }
1650 nigel 91 ecode = prev;
1651     goto TAIL_RECURSE;
1652 nigel 77 }
1653 nigel 91 else /* OP_KETRMAX */
1654     {
1655 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1656 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657     ecode += 1 + LINK_SIZE;
1658 ph10 197 flags = 0;
1659 nigel 91 goto TAIL_RECURSE;
1660     }
1661     /* Control never gets here */
1662 nigel 77
1663     /* Start of subject unless notbol, or after internal newline if multiline */
1664    
1665     case OP_CIRC:
1666 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1667 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1668     {
1669 nigel 91 if (eptr != md->start_subject &&
1670 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1671 ph10 510 MRRETURN(MATCH_NOMATCH);
1672 nigel 77 ecode++;
1673     break;
1674     }
1675     /* ... else fall through */
1676    
1677     /* Start of subject assertion */
1678    
1679     case OP_SOD:
1680 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1681 nigel 77 ecode++;
1682     break;
1683    
1684     /* Start of match assertion */
1685    
1686     case OP_SOM:
1687 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1688 nigel 77 ecode++;
1689     break;
1690 ph10 172
1691 ph10 168 /* Reset the start of match point */
1692 ph10 172
1693 ph10 168 case OP_SET_SOM:
1694     mstart = eptr;
1695 ph10 172 ecode++;
1696     break;
1697 nigel 77
1698     /* Assert before internal newline if multiline, or before a terminating
1699     newline unless endonly is set, else end of subject unless noteol is set. */
1700    
1701     case OP_DOLL:
1702     if ((ims & PCRE_MULTILINE) != 0)
1703     {
1704     if (eptr < md->end_subject)
1705 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1706 nigel 77 else
1707 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1708 nigel 77 ecode++;
1709     break;
1710     }
1711     else
1712     {
1713 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1714 nigel 77 if (!md->endonly)
1715     {
1716 nigel 91 if (eptr != md->end_subject &&
1717 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718 ph10 510 MRRETURN(MATCH_NOMATCH);
1719 nigel 77 ecode++;
1720     break;
1721     }
1722     }
1723 nigel 91 /* ... else fall through for endonly */
1724 nigel 77
1725     /* End of subject assertion (\z) */
1726    
1727     case OP_EOD:
1728 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1729 nigel 77 ecode++;
1730     break;
1731    
1732     /* End of subject or ending \n assertion (\Z) */
1733    
1734     case OP_EODN:
1735 nigel 91 if (eptr != md->end_subject &&
1736 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737 ph10 510 MRRETURN(MATCH_NOMATCH);
1738 nigel 77 ecode++;
1739     break;
1740    
1741     /* Word boundary assertions */
1742    
1743     case OP_NOT_WORD_BOUNDARY:
1744     case OP_WORD_BOUNDARY:
1745     {
1746    
1747     /* Find out if the previous and current characters are "word" characters.
1748     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1749 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1750 ph10 435 partial matching. */
1751 nigel 77
1752     #ifdef SUPPORT_UTF8
1753     if (utf8)
1754     {
1755 ph10 518 /* Get status of previous character */
1756 ph10 527
1757 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1758     {
1759 ph10 409 USPTR lastptr = eptr - 1;
1760 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1761 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1762 nigel 77 GETCHAR(c, lastptr);
1763 ph10 527 #ifdef SUPPORT_UCP
1764 ph10 518 if (md->use_ucp)
1765     {
1766     if (c == '_') prev_is_word = TRUE; else
1767 ph10 527 {
1768 ph10 518 int cat = UCD_CATEGORY(c);
1769     prev_is_word = (cat == ucp_L || cat == ucp_N);
1770 ph10 527 }
1771     }
1772     else
1773     #endif
1774 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1775     }
1776 ph10 527
1777 ph10 518 /* Get status of next character */
1778 ph10 527
1779 ph10 443 if (eptr >= md->end_subject)
1780 nigel 77 {
1781 ph10 443 SCHECK_PARTIAL();
1782     cur_is_word = FALSE;
1783 ph10 428 }
1784     else
1785     {
1786 nigel 77 GETCHAR(c, eptr);
1787 ph10 527 #ifdef SUPPORT_UCP
1788 ph10 518 if (md->use_ucp)
1789     {
1790     if (c == '_') cur_is_word = TRUE; else
1791 ph10 527 {
1792 ph10 518 int cat = UCD_CATEGORY(c);
1793     cur_is_word = (cat == ucp_L || cat == ucp_N);
1794 ph10 527 }
1795     }
1796     else
1797     #endif
1798 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1799     }
1800     }
1801     else
1802     #endif
1803    
1804 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1805 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1806 nigel 77
1807     {
1808 ph10 518 /* Get status of previous character */
1809 ph10 527
1810 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1811     {
1812 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1813 ph10 527 #ifdef SUPPORT_UCP
1814 ph10 518 if (md->use_ucp)
1815     {
1816 ph10 527 c = eptr[-1];
1817 ph10 518 if (c == '_') prev_is_word = TRUE; else
1818 ph10 527 {
1819 ph10 518 int cat = UCD_CATEGORY(c);
1820     prev_is_word = (cat == ucp_L || cat == ucp_N);
1821 ph10 527 }
1822     }
1823     else
1824     #endif
1825 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1826     }
1827 ph10 527
1828 ph10 518 /* Get status of next character */
1829 ph10 527
1830 ph10 443 if (eptr >= md->end_subject)
1831 ph10 428 {
1832 ph10 443 SCHECK_PARTIAL();
1833     cur_is_word = FALSE;
1834 ph10 428 }
1835 ph10 527 else
1836     #ifdef SUPPORT_UCP
1837 ph10 518 if (md->use_ucp)
1838     {
1839 ph10 527 c = *eptr;
1840 ph10 518 if (c == '_') cur_is_word = TRUE; else
1841 ph10 527 {
1842 ph10 518 int cat = UCD_CATEGORY(c);
1843     cur_is_word = (cat == ucp_L || cat == ucp_N);
1844 ph10 527 }
1845     }
1846     else
1847     #endif
1848 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1849 nigel 77 }
1850    
1851     /* Now see if the situation is what we want */
1852    
1853     if ((*ecode++ == OP_WORD_BOUNDARY)?
1854     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1855 ph10 510 MRRETURN(MATCH_NOMATCH);
1856 nigel 77 }
1857     break;
1858    
1859     /* Match a single character type; inline for speed */
1860    
1861     case OP_ANY:
1862 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1863 ph10 345 /* Fall through */
1864    
1865 ph10 341 case OP_ALLANY:
1866 ph10 443 if (eptr++ >= md->end_subject)
1867 ph10 428 {
1868 ph10 443 SCHECK_PARTIAL();
1869 ph10 510 MRRETURN(MATCH_NOMATCH);
1870 ph10 443 }
1871 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1872 nigel 77 ecode++;
1873     break;
1874    
1875     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1876     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1877    
1878     case OP_ANYBYTE:
1879 ph10 443 if (eptr++ >= md->end_subject)
1880 ph10 428 {
1881 ph10 443 SCHECK_PARTIAL();
1882 ph10 510 MRRETURN(MATCH_NOMATCH);
1883 ph10 443 }
1884 nigel 77 ecode++;
1885     break;
1886    
1887     case OP_NOT_DIGIT:
1888 ph10 443 if (eptr >= md->end_subject)
1889 ph10 428 {
1890 ph10 443 SCHECK_PARTIAL();
1891 ph10 510 MRRETURN(MATCH_NOMATCH);
1892 ph10 443 }
1893 nigel 77 GETCHARINCTEST(c, eptr);
1894     if (
1895     #ifdef SUPPORT_UTF8
1896     c < 256 &&
1897     #endif
1898     (md->ctypes[c] & ctype_digit) != 0
1899     )
1900 ph10 510 MRRETURN(MATCH_NOMATCH);
1901 nigel 77 ecode++;
1902     break;
1903    
1904     case OP_DIGIT:
1905 ph10 443 if (eptr >= md->end_subject)
1906 ph10 428 {
1907 ph10 443 SCHECK_PARTIAL();
1908 ph10 510 MRRETURN(MATCH_NOMATCH);
1909 ph10 443 }
1910 nigel 77 GETCHARINCTEST(c, eptr);
1911     if (
1912     #ifdef SUPPORT_UTF8
1913     c >= 256 ||
1914     #endif
1915     (md->ctypes[c] & ctype_digit) == 0
1916     )
1917 ph10 510 MRRETURN(MATCH_NOMATCH);
1918 nigel 77 ecode++;
1919     break;
1920    
1921     case OP_NOT_WHITESPACE:
1922 ph10 443 if (eptr >= md->end_subject)
1923 ph10 428 {
1924 ph10 443 SCHECK_PARTIAL();
1925 ph10 510 MRRETURN(MATCH_NOMATCH);
1926 ph10 443 }
1927 nigel 77 GETCHARINCTEST(c, eptr);
1928     if (
1929     #ifdef SUPPORT_UTF8
1930     c < 256 &&
1931     #endif
1932     (md->ctypes[c] & ctype_space) != 0
1933     )
1934 ph10 510 MRRETURN(MATCH_NOMATCH);
1935 nigel 77 ecode++;
1936     break;
1937    
1938     case OP_WHITESPACE:
1939 ph10 443 if (eptr >= md->end_subject)
1940 ph10 428 {
1941 ph10 443 SCHECK_PARTIAL();
1942 ph10 510 MRRETURN(MATCH_NOMATCH);
1943 ph10 443 }
1944 nigel 77 GETCHARINCTEST(c, eptr);
1945     if (
1946     #ifdef SUPPORT_UTF8
1947     c >= 256 ||
1948     #endif
1949     (md->ctypes[c] & ctype_space) == 0
1950     )
1951 ph10 510 MRRETURN(MATCH_NOMATCH);
1952 nigel 77 ecode++;
1953     break;
1954    
1955     case OP_NOT_WORDCHAR:
1956 ph10 443 if (eptr >= md->end_subject)
1957 ph10 428 {
1958 ph10 443 SCHECK_PARTIAL();
1959 ph10 510 MRRETURN(MATCH_NOMATCH);
1960 ph10 443 }
1961 nigel 77 GETCHARINCTEST(c, eptr);
1962     if (
1963     #ifdef SUPPORT_UTF8
1964     c < 256 &&
1965     #endif
1966     (md->ctypes[c] & ctype_word) != 0
1967     )
1968 ph10 510 MRRETURN(MATCH_NOMATCH);
1969 nigel 77 ecode++;
1970     break;
1971    
1972     case OP_WORDCHAR:
1973 ph10 443 if (eptr >= md->end_subject)
1974 ph10 428 {
1975 ph10 443 SCHECK_PARTIAL();
1976 ph10 510 MRRETURN(MATCH_NOMATCH);
1977 ph10 443 }
1978 nigel 77 GETCHARINCTEST(c, eptr);
1979     if (
1980     #ifdef SUPPORT_UTF8
1981     c >= 256 ||
1982     #endif
1983     (md->ctypes[c] & ctype_word) == 0
1984     )
1985 ph10 510 MRRETURN(MATCH_NOMATCH);
1986 nigel 77 ecode++;
1987     break;
1988    
1989 nigel 93 case OP_ANYNL:
1990 ph10 443 if (eptr >= md->end_subject)
1991 ph10 428 {
1992 ph10 443 SCHECK_PARTIAL();
1993 ph10 510 MRRETURN(MATCH_NOMATCH);
1994 ph10 443 }
1995 nigel 93 GETCHARINCTEST(c, eptr);
1996     switch(c)
1997     {
1998 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1999 nigel 93 case 0x000d:
2000     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2001     break;
2002 ph10 231
2003 nigel 93 case 0x000a:
2004 ph10 231 break;
2005    
2006 nigel 93 case 0x000b:
2007     case 0x000c:
2008     case 0x0085:
2009     case 0x2028:
2010     case 0x2029:
2011 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2012 nigel 93 break;
2013     }
2014     ecode++;
2015     break;
2016    
2017 ph10 178 case OP_NOT_HSPACE:
2018 ph10 443 if (eptr >= md->end_subject)
2019 ph10 428 {
2020 ph10 443 SCHECK_PARTIAL();
2021 ph10 510 MRRETURN(MATCH_NOMATCH);
2022 ph10 443 }
2023 ph10 178 GETCHARINCTEST(c, eptr);
2024     switch(c)
2025     {
2026     default: break;
2027     case 0x09: /* HT */
2028     case 0x20: /* SPACE */
2029     case 0xa0: /* NBSP */
2030     case 0x1680: /* OGHAM SPACE MARK */
2031     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2032     case 0x2000: /* EN QUAD */
2033     case 0x2001: /* EM QUAD */
2034     case 0x2002: /* EN SPACE */
2035     case 0x2003: /* EM SPACE */
2036     case 0x2004: /* THREE-PER-EM SPACE */
2037     case 0x2005: /* FOUR-PER-EM SPACE */
2038     case 0x2006: /* SIX-PER-EM SPACE */
2039     case 0x2007: /* FIGURE SPACE */
2040     case 0x2008: /* PUNCTUATION SPACE */
2041     case 0x2009: /* THIN SPACE */
2042     case 0x200A: /* HAIR SPACE */
2043     case 0x202f: /* NARROW NO-BREAK SPACE */
2044     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2045     case 0x3000: /* IDEOGRAPHIC SPACE */
2046 ph10 510 MRRETURN(MATCH_NOMATCH);
2047 ph10 178 }
2048     ecode++;
2049     break;
2050    
2051     case OP_HSPACE:
2052 ph10 443 if (eptr >= md->end_subject)
2053 ph10 428 {
2054 ph10 443 SCHECK_PARTIAL();
2055 ph10 510 MRRETURN(MATCH_NOMATCH);
2056 ph10 443 }
2057 ph10 178 GETCHARINCTEST(c, eptr);
2058     switch(c)
2059     {
2060 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2061 ph10 178 case 0x09: /* HT */
2062     case 0x20: /* SPACE */
2063     case 0xa0: /* NBSP */
2064     case 0x1680: /* OGHAM SPACE MARK */
2065     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2066     case 0x2000: /* EN QUAD */
2067     case 0x2001: /* EM QUAD */
2068     case 0x2002: /* EN SPACE */
2069     case 0x2003: /* EM SPACE */
2070     case 0x2004: /* THREE-PER-EM SPACE */
2071     case 0x2005: /* FOUR-PER-EM SPACE */
2072     case 0x2006: /* SIX-PER-EM SPACE */
2073     case 0x2007: /* FIGURE SPACE */
2074     case 0x2008: /* PUNCTUATION SPACE */
2075     case 0x2009: /* THIN SPACE */
2076     case 0x200A: /* HAIR SPACE */
2077     case 0x202f: /* NARROW NO-BREAK SPACE */
2078     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2079     case 0x3000: /* IDEOGRAPHIC SPACE */
2080     break;
2081     }
2082     ecode++;
2083     break;
2084    
2085     case OP_NOT_VSPACE:
2086 ph10 443 if (eptr >= md->end_subject)
2087 ph10 428 {
2088 ph10 443 SCHECK_PARTIAL();
2089 ph10 510 MRRETURN(MATCH_NOMATCH);
2090 ph10 443 }
2091 ph10 178 GETCHARINCTEST(c, eptr);
2092     switch(c)
2093     {
2094     default: break;
2095     case 0x0a: /* LF */
2096     case 0x0b: /* VT */
2097     case 0x0c: /* FF */
2098     case 0x0d: /* CR */
2099     case 0x85: /* NEL */
2100     case 0x2028: /* LINE SEPARATOR */
2101     case 0x2029: /* PARAGRAPH SEPARATOR */
2102 ph10 510 MRRETURN(MATCH_NOMATCH);
2103 ph10 178 }
2104     ecode++;
2105     break;
2106    
2107     case OP_VSPACE:
2108 ph10 443 if (eptr >= md->end_subject)
2109 ph10 428 {
2110 ph10 443 SCHECK_PARTIAL();
2111 ph10 510 MRRETURN(MATCH_NOMATCH);
2112 ph10 443 }
2113 ph10 178 GETCHARINCTEST(c, eptr);
2114     switch(c)
2115     {
2116 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2117 ph10 178 case 0x0a: /* LF */
2118     case 0x0b: /* VT */
2119     case 0x0c: /* FF */
2120     case 0x0d: /* CR */
2121     case 0x85: /* NEL */
2122     case 0x2028: /* LINE SEPARATOR */
2123     case 0x2029: /* PARAGRAPH SEPARATOR */
2124     break;
2125     }
2126     ecode++;
2127     break;
2128    
2129 nigel 77 #ifdef SUPPORT_UCP
2130     /* Check the next character by Unicode property. We will get here only
2131     if the support is in the binary; otherwise a compile-time error occurs. */
2132    
2133     case OP_PROP:
2134     case OP_NOTPROP:
2135 ph10 443 if (eptr >= md->end_subject)
2136 ph10 428 {
2137 ph10 443 SCHECK_PARTIAL();
2138 ph10 510 MRRETURN(MATCH_NOMATCH);
2139 ph10 443 }
2140 nigel 77 GETCHARINCTEST(c, eptr);
2141     {
2142 ph10 384 const ucd_record *prop = GET_UCD(c);
2143 nigel 77
2144 nigel 87 switch(ecode[1])
2145     {
2146     case PT_ANY:
2147 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2148 nigel 87 break;
2149 nigel 77
2150 nigel 87 case PT_LAMP:
2151 ph10 349 if ((prop->chartype == ucp_Lu ||
2152     prop->chartype == ucp_Ll ||
2153     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2154 ph10 510 MRRETURN(MATCH_NOMATCH);
2155 ph10 517 break;
2156 nigel 87
2157     case PT_GC:
2158 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2159 ph10 510 MRRETURN(MATCH_NOMATCH);
2160 nigel 87 break;
2161    
2162     case PT_PC:
2163 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2164 ph10 510 MRRETURN(MATCH_NOMATCH);
2165 nigel 87 break;
2166    
2167     case PT_SC:
2168 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2169 ph10 510 MRRETURN(MATCH_NOMATCH);
2170 nigel 87 break;
2171 ph10 527
2172 ph10 517 /* These are specials */
2173 ph10 527
2174 ph10 517 case PT_ALNUM:
2175     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2176     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2177     MRRETURN(MATCH_NOMATCH);
2178 ph10 527 break;
2179    
2180 ph10 517 case PT_SPACE: /* Perl space */
2181     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2182     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2183     == (op == OP_NOTPROP))
2184     MRRETURN(MATCH_NOMATCH);
2185 ph10 527 break;
2186    
2187 ph10 517 case PT_PXSPACE: /* POSIX space */
2188     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2189 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2190 ph10 517 c == CHAR_FF || c == CHAR_CR)
2191     == (op == OP_NOTPROP))
2192     MRRETURN(MATCH_NOMATCH);
2193 ph10 527 break;
2194 nigel 87
2195 ph10 527 case PT_WORD:
2196 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2198 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2199     MRRETURN(MATCH_NOMATCH);
2200 ph10 527 break;
2201    
2202 ph10 517 /* This should never occur */
2203    
2204 nigel 87 default:
2205     RRETURN(PCRE_ERROR_INTERNAL);
2206 nigel 77 }
2207 nigel 87
2208     ecode += 3;
2209 nigel 77 }
2210     break;
2211    
2212     /* Match an extended Unicode sequence. We will get here only if the support
2213     is in the binary; otherwise a compile-time error occurs. */
2214    
2215     case OP_EXTUNI:
2216 ph10 443 if (eptr >= md->end_subject)
2217 ph10 428 {
2218 ph10 443 SCHECK_PARTIAL();
2219 ph10 510 MRRETURN(MATCH_NOMATCH);
2220 ph10 443 }
2221 nigel 77 GETCHARINCTEST(c, eptr);
2222     {
2223 ph10 349 int category = UCD_CATEGORY(c);
2224 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2225 nigel 77 while (eptr < md->end_subject)
2226     {
2227     int len = 1;
2228     if (!utf8) c = *eptr; else
2229     {
2230     GETCHARLEN(c, eptr, len);
2231     }
2232 ph10 349 category = UCD_CATEGORY(c);
2233 nigel 77 if (category != ucp_M) break;
2234     eptr += len;
2235     }
2236     }
2237     ecode++;
2238     break;
2239     #endif
2240    
2241    
2242     /* Match a back reference, possibly repeatedly. Look past the end of the
2243     item to see if there is repeat information following. The code is similar
2244     to that for character classes, but repeated for efficiency. Then obey
2245     similar code to character type repeats - written out again for speed.
2246     However, if the referenced string is the empty string, always treat
2247     it as matched, any number of times (otherwise there could be infinite
2248     loops). */
2249    
2250     case OP_REF:
2251     {
2252     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2253 ph10 345 ecode += 3;
2254    
2255 ph10 336 /* If the reference is unset, there are two possibilities:
2256 ph10 345
2257 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2258     than the amount of subject left; this ensures that every attempt at a
2259     match fails. We can't just fail here, because of the possibility of
2260     quantifiers with zero minima.
2261 ph10 345
2262     (b) If the JavaScript compatibility flag is set, set the length to zero
2263     so that the back reference matches an empty string.
2264    
2265     Otherwise, set the length to the length of what was matched by the
2266 ph10 336 referenced subpattern. */
2267 ph10 345
2268 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2269 ph10 530 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2270 ph10 336 else
2271     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2272 nigel 77
2273     /* Set up for repetition, or handle the non-repeated case */
2274    
2275     switch (*ecode)
2276     {
2277     case OP_CRSTAR:
2278     case OP_CRMINSTAR:
2279     case OP_CRPLUS:
2280     case OP_CRMINPLUS:
2281     case OP_CRQUERY:
2282     case OP_CRMINQUERY:
2283     c = *ecode++ - OP_CRSTAR;
2284     minimize = (c & 1) != 0;
2285     min = rep_min[c]; /* Pick up values from tables; */
2286     max = rep_max[c]; /* zero for max => infinity */
2287     if (max == 0) max = INT_MAX;
2288     break;
2289    
2290     case OP_CRRANGE:
2291     case OP_CRMINRANGE:
2292     minimize = (*ecode == OP_CRMINRANGE);
2293     min = GET2(ecode, 1);
2294     max = GET2(ecode, 3);
2295     if (max == 0) max = INT_MAX;
2296     ecode += 5;
2297     break;
2298    
2299     default: /* No repeat follows */
2300 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2301 ph10 428 {
2302 ph10 443 CHECK_PARTIAL();
2303 ph10 510 MRRETURN(MATCH_NOMATCH);
2304 ph10 443 }
2305 nigel 77 eptr += length;
2306     continue; /* With the main loop */
2307     }
2308    
2309     /* If the length of the reference is zero, just continue with the
2310     main loop. */
2311 ph10 443
2312 nigel 77 if (length == 0) continue;
2313    
2314     /* First, ensure the minimum number of matches are present. We get back
2315     the length of the reference string explicitly rather than passing the
2316     address of eptr, so that eptr can be a register variable. */
2317    
2318     for (i = 1; i <= min; i++)
2319     {
2320 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2321 ph10 426 {
2322 ph10 427 CHECK_PARTIAL();
2323 ph10 510 MRRETURN(MATCH_NOMATCH);
2324 ph10 427 }
2325 nigel 77 eptr += length;
2326     }
2327    
2328     /* If min = max, continue at the same level without recursion.
2329     They are not both allowed to be zero. */
2330    
2331     if (min == max) continue;
2332    
2333     /* If minimizing, keep trying and advancing the pointer */
2334    
2335     if (minimize)
2336     {
2337     for (fi = min;; fi++)
2338     {
2339 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2340 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2341 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2342 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2343 ph10 426 {
2344 ph10 427 CHECK_PARTIAL();
2345 ph10 510 MRRETURN(MATCH_NOMATCH);
2346 ph10 427 }
2347 nigel 77 eptr += length;
2348     }
2349     /* Control never gets here */
2350     }
2351    
2352     /* If maximizing, find the longest string and work backwards */
2353    
2354     else
2355     {
2356     pp = eptr;
2357     for (i = min; i < max; i++)
2358     {
2359 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2360 ph10 462 {
2361 ph10 463 CHECK_PARTIAL();
2362 ph10 462 break;
2363 ph10 463 }
2364 nigel 77 eptr += length;
2365     }
2366     while (eptr >= pp)
2367     {
2368 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2369 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370     eptr -= length;
2371     }
2372 ph10 510 MRRETURN(MATCH_NOMATCH);
2373 nigel 77 }
2374     }
2375     /* Control never gets here */
2376    
2377     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2378     used when all the characters in the class have values in the range 0-255,
2379     and either the matching is caseful, or the characters are in the range
2380     0-127 when UTF-8 processing is enabled. The only difference between
2381     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2382     encountered.
2383    
2384     First, look past the end of the item to see if there is repeat information
2385     following. Then obey similar code to character type repeats - written out
2386     again for speed. */
2387    
2388     case OP_NCLASS:
2389     case OP_CLASS:
2390     {
2391     data = ecode + 1; /* Save for matching */
2392     ecode += 33; /* Advance past the item */
2393    
2394     switch (*ecode)
2395     {
2396     case OP_CRSTAR:
2397     case OP_CRMINSTAR:
2398     case OP_CRPLUS:
2399     case OP_CRMINPLUS:
2400     case OP_CRQUERY:
2401     case OP_CRMINQUERY:
2402     c = *ecode++ - OP_CRSTAR;
2403     minimize = (c & 1) != 0;
2404     min = rep_min[c]; /* Pick up values from tables; */
2405     max = rep_max[c]; /* zero for max => infinity */
2406     if (max == 0) max = INT_MAX;
2407     break;
2408    
2409     case OP_CRRANGE:
2410     case OP_CRMINRANGE:
2411     minimize = (*ecode == OP_CRMINRANGE);
2412     min = GET2(ecode, 1);
2413     max = GET2(ecode, 3);
2414     if (max == 0) max = INT_MAX;
2415     ecode += 5;
2416     break;
2417    
2418     default: /* No repeat follows */
2419     min = max = 1;
2420     break;
2421     }
2422    
2423     /* First, ensure the minimum number of matches are present. */
2424    
2425     #ifdef SUPPORT_UTF8
2426     /* UTF-8 mode */
2427     if (utf8)
2428     {
2429     for (i = 1; i <= min; i++)
2430     {
2431 ph10 427 if (eptr >= md->end_subject)
2432 ph10 426 {
2433 ph10 428 SCHECK_PARTIAL();
2434 ph10 510 MRRETURN(MATCH_NOMATCH);
2435 ph10 427 }
2436 nigel 77 GETCHARINC(c, eptr);
2437     if (c > 255)
2438     {
2439 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2440 nigel 77 }
2441     else
2442     {
2443 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2444 nigel 77 }
2445     }
2446     }
2447     else
2448     #endif
2449     /* Not UTF-8 mode */
2450     {
2451     for (i = 1; i <= min; i++)
2452     {
2453 ph10 427 if (eptr >= md->end_subject)
2454 ph10 426 {
2455 ph10 428 SCHECK_PARTIAL();
2456 ph10 510 MRRETURN(MATCH_NOMATCH);
2457 ph10 427 }
2458 nigel 77 c = *eptr++;
2459 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2460 nigel 77 }
2461     }
2462    
2463     /* If max == min we can continue with the main loop without the
2464     need to recurse. */
2465    
2466     if (min == max) continue;
2467    
2468     /* If minimizing, keep testing the rest of the expression and advancing
2469     the pointer while it matches the class. */
2470    
2471     if (minimize)
2472     {
2473     #ifdef SUPPORT_UTF8
2474     /* UTF-8 mode */
2475     if (utf8)
2476     {
2477     for (fi = min;; fi++)
2478     {
2479 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2480 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2481 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2482 ph10 427 if (eptr >= md->end_subject)
2483 ph10 426 {
2484 ph10 427 SCHECK_PARTIAL();
2485 ph10 510 MRRETURN(MATCH_NOMATCH);
2486 ph10 427 }
2487 nigel 77 GETCHARINC(c, eptr);
2488     if (c > 255)
2489     {
2490 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2491 nigel 77 }
2492     else
2493     {
2494 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2495 nigel 77 }
2496     }
2497     }
2498     else
2499     #endif
2500     /* Not UTF-8 mode */
2501     {
2502     for (fi = min;; fi++)
2503     {
2504 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2505 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2506 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2507 ph10 427 if (eptr >= md->end_subject)
2508 ph10 426 {
2509 ph10 427 SCHECK_PARTIAL();
2510 ph10 510 MRRETURN(MATCH_NOMATCH);
2511 ph10 427 }
2512 nigel 77 c = *eptr++;
2513 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2514 nigel 77 }
2515     }
2516     /* Control never gets here */
2517     }
2518    
2519     /* If maximizing, find the longest possible run, then work backwards. */
2520    
2521     else
2522     {
2523     pp = eptr;
2524    
2525     #ifdef SUPPORT_UTF8
2526     /* UTF-8 mode */
2527     if (utf8)
2528     {
2529     for (i = min; i < max; i++)
2530     {
2531     int len = 1;
2532 ph10 463 if (eptr >= md->end_subject)
2533 ph10 462 {
2534 ph10 463 SCHECK_PARTIAL();
2535 ph10 462 break;
2536 ph10 463 }
2537 nigel 77 GETCHARLEN(c, eptr, len);
2538     if (c > 255)
2539     {
2540     if (op == OP_CLASS) break;
2541     }
2542     else
2543     {
2544     if ((data[c/8] & (1 << (c&7))) == 0) break;
2545     }
2546     eptr += len;
2547     }
2548     for (;;)
2549     {
2550 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2551 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2552     if (eptr-- == pp) break; /* Stop if tried at original pos */
2553     BACKCHAR(eptr);
2554     }
2555     }
2556     else
2557     #endif
2558     /* Not UTF-8 mode */
2559     {
2560     for (i = min; i < max; i++)
2561     {
2562 ph10 463 if (eptr >= md->end_subject)
2563 ph10 462 {
2564 ph10 463 SCHECK_PARTIAL();
2565 ph10 462 break;
2566 ph10 463 }
2567 nigel 77 c = *eptr;
2568     if ((data[c/8] & (1 << (c&7))) == 0) break;
2569     eptr++;
2570     }
2571     while (eptr >= pp)
2572     {
2573 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2574 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2575 nigel 77 eptr--;
2576     }
2577     }
2578    
2579 ph10 510 MRRETURN(MATCH_NOMATCH);
2580 nigel 77 }
2581     }
2582     /* Control never gets here */
2583    
2584    
2585     /* Match an extended character class. This opcode is encountered only
2586 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2587     mode, because Unicode properties are supported in non-UTF-8 mode. */
2588 nigel 77
2589     #ifdef SUPPORT_UTF8
2590     case OP_XCLASS:
2591     {
2592     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2593     ecode += GET(ecode, 1); /* Advance past the item */
2594    
2595     switch (*ecode)
2596     {
2597     case OP_CRSTAR:
2598     case OP_CRMINSTAR:
2599     case OP_CRPLUS:
2600     case OP_CRMINPLUS:
2601     case OP_CRQUERY:
2602     case OP_CRMINQUERY:
2603     c = *ecode++ - OP_CRSTAR;
2604     minimize = (c & 1) != 0;
2605     min = rep_min[c]; /* Pick up values from tables; */
2606     max = rep_max[c]; /* zero for max => infinity */
2607     if (max == 0) max = INT_MAX;
2608     break;
2609    
2610     case OP_CRRANGE:
2611     case OP_CRMINRANGE:
2612     minimize = (*ecode == OP_CRMINRANGE);
2613     min = GET2(ecode, 1);
2614     max = GET2(ecode, 3);
2615     if (max == 0) max = INT_MAX;
2616     ecode += 5;
2617     break;
2618    
2619     default: /* No repeat follows */
2620     min = max = 1;
2621     break;
2622     }
2623    
2624     /* First, ensure the minimum number of matches are present. */
2625    
2626     for (i = 1; i <= min; i++)
2627     {
2628 ph10 427 if (eptr >= md->end_subject)
2629 ph10 426 {
2630     SCHECK_PARTIAL();
2631 ph10 510 MRRETURN(MATCH_NOMATCH);
2632 ph10 427 }
2633 ph10 384 GETCHARINCTEST(c, eptr);
2634 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2635 nigel 77 }
2636    
2637     /* If max == min we can continue with the main loop without the
2638     need to recurse. */
2639    
2640     if (min == max) continue;
2641    
2642     /* If minimizing, keep testing the rest of the expression and advancing
2643     the pointer while it matches the class. */
2644    
2645     if (minimize)
2646     {
2647     for (fi = min;; fi++)
2648     {
2649 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2650 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2652 ph10 427 if (eptr >= md->end_subject)
2653 ph10 426 {
2654 ph10 427 SCHECK_PARTIAL();
2655 ph10 510 MRRETURN(MATCH_NOMATCH);
2656 ph10 427 }
2657 ph10 384 GETCHARINCTEST(c, eptr);
2658 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2659 nigel 77 }
2660     /* Control never gets here */
2661     }
2662    
2663     /* If maximizing, find the longest possible run, then work backwards. */
2664    
2665     else
2666     {
2667     pp = eptr;
2668     for (i = min; i < max; i++)
2669     {
2670     int len = 1;
2671 ph10 463 if (eptr >= md->end_subject)
2672 ph10 462 {
2673 ph10 463 SCHECK_PARTIAL();
2674 ph10 462 break;
2675 ph10 463 }
2676 ph10 384 GETCHARLENTEST(c, eptr, len);
2677 nigel 77 if (!_pcre_xclass(c, data)) break;
2678     eptr += len;
2679     }
2680     for(;;)
2681     {
2682 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2683 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684     if (eptr-- == pp) break; /* Stop if tried at original pos */
2685 ph10 214 if (utf8) BACKCHAR(eptr);
2686 nigel 77 }
2687 ph10 510 MRRETURN(MATCH_NOMATCH);
2688 nigel 77 }
2689    
2690     /* Control never gets here */
2691     }
2692     #endif /* End of XCLASS */
2693    
2694     /* Match a single character, casefully */
2695    
2696     case OP_CHAR:
2697     #ifdef SUPPORT_UTF8
2698     if (utf8)
2699     {
2700     length = 1;
2701     ecode++;
2702     GETCHARLEN(fc, ecode, length);
2703 ph10 443 if (length > md->end_subject - eptr)
2704 ph10 428 {
2705     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2706 ph10 510 MRRETURN(MATCH_NOMATCH);
2707 ph10 443 }
2708 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2709 nigel 77 }
2710     else
2711     #endif
2712    
2713     /* Non-UTF-8 mode */
2714     {
2715 ph10 443 if (md->end_subject - eptr < 1)
2716 ph10 428 {
2717     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2718 ph10 510 MRRETURN(MATCH_NOMATCH);
2719 ph10 443 }
2720 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2721 nigel 77 ecode += 2;
2722     }
2723     break;
2724    
2725     /* Match a single character, caselessly */
2726    
2727     case OP_CHARNC:
2728     #ifdef SUPPORT_UTF8
2729     if (utf8)
2730     {
2731     length = 1;
2732     ecode++;
2733     GETCHARLEN(fc, ecode, length);
2734    
2735 ph10 443 if (length > md->end_subject - eptr)
2736 ph10 428 {
2737     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2738 ph10 510 MRRETURN(MATCH_NOMATCH);
2739 ph10 443 }
2740 nigel 77
2741     /* If the pattern character's value is < 128, we have only one byte, and
2742     can use the fast lookup table. */
2743    
2744     if (fc < 128)
2745     {
2746 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747 nigel 77 }
2748    
2749     /* Otherwise we must pick up the subject character */
2750    
2751     else
2752     {
2753 nigel 93 unsigned int dc;
2754 nigel 77 GETCHARINC(dc, eptr);
2755     ecode += length;
2756    
2757     /* If we have Unicode property support, we can use it to test the other
2758 nigel 87 case of the character, if there is one. */
2759 nigel 77
2760     if (fc != dc)
2761     {
2762     #ifdef SUPPORT_UCP
2763 ph10 349 if (dc != UCD_OTHERCASE(fc))
2764 nigel 77 #endif
2765 ph10 510 MRRETURN(MATCH_NOMATCH);
2766 nigel 77 }
2767     }
2768     }
2769     else
2770     #endif /* SUPPORT_UTF8 */
2771    
2772     /* Non-UTF-8 mode */
2773     {
2774 ph10 443 if (md->end_subject - eptr < 1)
2775 ph10 428 {
2776 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2777 ph10 510 MRRETURN(MATCH_NOMATCH);
2778 ph10 443 }
2779 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2780 nigel 77 ecode += 2;
2781     }
2782     break;
2783    
2784 nigel 93 /* Match a single character repeatedly. */
2785 nigel 77
2786     case OP_EXACT:
2787     min = max = GET2(ecode, 1);
2788     ecode += 3;
2789     goto REPEATCHAR;
2790    
2791 nigel 93 case OP_POSUPTO:
2792     possessive = TRUE;
2793     /* Fall through */
2794    
2795 nigel 77 case OP_UPTO:
2796     case OP_MINUPTO:
2797     min = 0;
2798     max = GET2(ecode, 1);
2799     minimize = *ecode == OP_MINUPTO;
2800     ecode += 3;
2801     goto REPEATCHAR;
2802    
2803 nigel 93 case OP_POSSTAR:
2804     possessive = TRUE;
2805     min = 0;
2806     max = INT_MAX;
2807     ecode++;
2808     goto REPEATCHAR;
2809    
2810     case OP_POSPLUS:
2811     possessive = TRUE;
2812     min = 1;
2813     max = INT_MAX;
2814     ecode++;
2815     goto REPEATCHAR;
2816    
2817     case OP_POSQUERY:
2818     possessive = TRUE;
2819     min = 0;
2820     max = 1;
2821     ecode++;
2822     goto REPEATCHAR;
2823    
2824 nigel 77 case OP_STAR:
2825     case OP_MINSTAR:
2826     case OP_PLUS:
2827     case OP_MINPLUS:
2828     case OP_QUERY:
2829     case OP_MINQUERY:
2830     c = *ecode++ - OP_STAR;
2831     minimize = (c & 1) != 0;
2832 ph10 443
2833 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2834     max = rep_max[c]; /* zero for max => infinity */
2835     if (max == 0) max = INT_MAX;
2836    
2837 ph10 426 /* Common code for all repeated single-character matches. */
2838 nigel 77
2839     REPEATCHAR:
2840     #ifdef SUPPORT_UTF8
2841     if (utf8)
2842     {
2843     length = 1;
2844     charptr = ecode;
2845     GETCHARLEN(fc, ecode, length);
2846     ecode += length;
2847    
2848     /* Handle multibyte character matching specially here. There is
2849     support for caseless matching if UCP support is present. */
2850    
2851     if (length > 1)
2852     {
2853     #ifdef SUPPORT_UCP
2854 nigel 93 unsigned int othercase;
2855 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2856 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2857 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2858 ph10 115 else oclength = 0;
2859 nigel 77 #endif /* SUPPORT_UCP */
2860    
2861     for (i = 1; i <= min; i++)
2862     {
2863 ph10 426 if (eptr <= md->end_subject - length &&
2864     memcmp(eptr, charptr, length) == 0) eptr += length;
2865 ph10 123 #ifdef SUPPORT_UCP
2866 ph10 426 else if (oclength > 0 &&
2867     eptr <= md->end_subject - oclength &&
2868     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869     #endif /* SUPPORT_UCP */
2870 nigel 77 else
2871     {
2872 ph10 426 CHECK_PARTIAL();
2873 ph10 510 MRRETURN(MATCH_NOMATCH);
2874 nigel 77 }
2875     }
2876    
2877     if (min == max) continue;
2878    
2879     if (minimize)
2880     {
2881     for (fi = min;; fi++)
2882     {
2883 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2884 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886 ph10 426 if (eptr <= md->end_subject - length &&
2887     memcmp(eptr, charptr, length) == 0) eptr += length;
2888 ph10 123 #ifdef SUPPORT_UCP
2889 ph10 426 else if (oclength > 0 &&
2890     eptr <= md->end_subject - oclength &&
2891     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892     #endif /* SUPPORT_UCP */
2893 nigel 77 else
2894     {
2895 ph10 426 CHECK_PARTIAL();
2896 ph10 510 MRRETURN(MATCH_NOMATCH);
2897 nigel 77 }
2898     }
2899     /* Control never gets here */
2900     }
2901 nigel 93
2902     else /* Maximize */
2903 nigel 77 {
2904     pp = eptr;
2905     for (i = min; i < max; i++)
2906     {
2907 ph10 426 if (eptr <= md->end_subject - length &&
2908     memcmp(eptr, charptr, length) == 0) eptr += length;
2909 ph10 123 #ifdef SUPPORT_UCP
2910 ph10 426 else if (oclength > 0 &&
2911     eptr <= md->end_subject - oclength &&
2912     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913     #endif /* SUPPORT_UCP */
2914 ph10 463 else
2915 ph10 462 {
2916 ph10 463 CHECK_PARTIAL();
2917 ph10 462 break;
2918 ph10 463 }
2919 nigel 77 }
2920 nigel 93
2921     if (possessive) continue;
2922 ph10 427
2923 ph10 120 for(;;)
2924 ph10 426 {
2925     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2926     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928 ph10 115 #ifdef SUPPORT_UCP
2929 ph10 426 eptr--;
2930     BACKCHAR(eptr);
2931 ph10 123 #else /* without SUPPORT_UCP */
2932 ph10 426 eptr -= length;
2933 ph10 123 #endif /* SUPPORT_UCP */
2934 ph10 426 }
2935 nigel 77 }
2936     /* Control never gets here */
2937     }
2938    
2939     /* If the length of a UTF-8 character is 1, we fall through here, and
2940     obey the code as for non-UTF-8 characters below, though in this case the
2941     value of fc will always be < 128. */
2942     }
2943     else
2944     #endif /* SUPPORT_UTF8 */
2945    
2946     /* When not in UTF-8 mode, load a single-byte character. */
2947    
2948 ph10 426 fc = *ecode++;
2949 ph10 443
2950 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2951     may not be in UTF-8 mode. The code is duplicated for the caseless and
2952     caseful cases, for speed, since matching characters is likely to be quite
2953     common. First, ensure the minimum number of matches are present. If min =
2954     max, continue at the same level without recursing. Otherwise, if
2955     minimizing, keep trying the rest of the expression and advancing one
2956     matching character if failing, up to the maximum. Alternatively, if
2957     maximizing, find the maximum number of characters and work backwards. */
2958    
2959     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960     max, eptr));
2961    
2962     if ((ims & PCRE_CASELESS) != 0)
2963     {
2964     fc = md->lcc[fc];
2965     for (i = 1; i <= min; i++)
2966 ph10 426 {
2967     if (eptr >= md->end_subject)
2968     {
2969     SCHECK_PARTIAL();
2970 ph10 510 MRRETURN(MATCH_NOMATCH);
2971 ph10 426 }
2972 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973 ph10 426 }
2974 nigel 77 if (min == max) continue;
2975     if (minimize)
2976     {
2977     for (fi = min;; fi++)
2978     {
2979 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2980 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982 ph10 426 if (eptr >= md->end_subject)
2983     {
2984 ph10 427 SCHECK_PARTIAL();
2985 ph10 510 MRRETURN(MATCH_NOMATCH);
2986 ph10 426 }
2987 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988 nigel 77 }
2989     /* Control never gets here */
2990     }
2991 nigel 93 else /* Maximize */
2992 nigel 77 {
2993     pp = eptr;
2994     for (i = min; i < max; i++)
2995     {
2996 ph10 463 if (eptr >= md->end_subject)
2997 ph10 462 {
2998     SCHECK_PARTIAL();
2999     break;
3000 ph10 463 }
3001 ph10 462 if (fc != md->lcc[*eptr]) break;
3002 nigel 77 eptr++;
3003     }
3004 ph10 427
3005 nigel 93 if (possessive) continue;
3006 ph10 427
3007 nigel 77 while (eptr >= pp)
3008     {
3009 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3010 nigel 77 eptr--;
3011     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012     }
3013 ph10 510 MRRETURN(MATCH_NOMATCH);
3014 nigel 77 }
3015     /* Control never gets here */
3016     }
3017    
3018     /* Caseful comparisons (includes all multi-byte characters) */
3019    
3020     else
3021     {
3022 ph10 427 for (i = 1; i <= min; i++)
3023 ph10 426 {
3024     if (eptr >= md->end_subject)
3025     {
3026     SCHECK_PARTIAL();
3027 ph10 510 MRRETURN(MATCH_NOMATCH);
3028 ph10 426 }
3029 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030 ph10 427 }
3031 ph10 443
3032 nigel 77 if (min == max) continue;
3033 ph10 443
3034 nigel 77 if (minimize)
3035     {
3036     for (fi = min;; fi++)
3037     {
3038 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3039 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041 ph10 426 if (eptr >= md->end_subject)
3042 ph10 427 {
3043 ph10 426 SCHECK_PARTIAL();
3044 ph10 510 MRRETURN(MATCH_NOMATCH);
3045 ph10 427 }
3046 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047 nigel 77 }
3048     /* Control never gets here */
3049     }
3050 nigel 93 else /* Maximize */
3051 nigel 77 {
3052     pp = eptr;
3053     for (i = min; i < max; i++)
3054     {
3055 ph10 463 if (eptr >= md->end_subject)
3056 ph10 462 {
3057 ph10 463 SCHECK_PARTIAL();
3058 ph10 462 break;
3059 ph10 463 }
3060 ph10 462 if (fc != *eptr) break;
3061 nigel 77 eptr++;
3062     }
3063 nigel 93 if (possessive) continue;
3064 ph10 443
3065 nigel 77 while (eptr >= pp)
3066     {
3067 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3068 nigel 77 eptr--;
3069     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070     }
3071 ph10 510 MRRETURN(MATCH_NOMATCH);
3072 nigel 77 }
3073     }
3074     /* Control never gets here */
3075    
3076     /* Match a negated single one-byte character. The character we are
3077     checking can be multibyte. */
3078    
3079     case OP_NOT:
3080 ph10 443 if (eptr >= md->end_subject)
3081 ph10 428 {
3082 ph10 443 SCHECK_PARTIAL();
3083 ph10 510 MRRETURN(MATCH_NOMATCH);
3084 ph10 443 }
3085 nigel 77 ecode++;
3086     GETCHARINCTEST(c, eptr);
3087     if ((ims & PCRE_CASELESS) != 0)
3088     {
3089     #ifdef SUPPORT_UTF8
3090     if (c < 256)
3091     #endif
3092     c = md->lcc[c];
3093 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3094 nigel 77 }
3095     else
3096     {
3097 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3098 nigel 77 }
3099     break;
3100    
3101     /* Match a negated single one-byte character repeatedly. This is almost a
3102     repeat of the code for a repeated single character, but I haven't found a
3103     nice way of commoning these up that doesn't require a test of the
3104     positive/negative option for each character match. Maybe that wouldn't add
3105     very much to the time taken, but character matching *is* what this is all
3106     about... */
3107    
3108     case OP_NOTEXACT:
3109     min = max = GET2(ecode, 1);
3110     ecode += 3;
3111     goto REPEATNOTCHAR;
3112    
3113     case OP_NOTUPTO:
3114     case OP_NOTMINUPTO:
3115     min = 0;
3116     max = GET2(ecode, 1);
3117     minimize = *ecode == OP_NOTMINUPTO;
3118     ecode += 3;
3119     goto REPEATNOTCHAR;
3120    
3121 nigel 93 case OP_NOTPOSSTAR:
3122     possessive = TRUE;
3123     min = 0;
3124     max = INT_MAX;
3125     ecode++;
3126     goto REPEATNOTCHAR;
3127    
3128     case OP_NOTPOSPLUS:
3129     possessive = TRUE;
3130     min = 1;
3131     max = INT_MAX;
3132     ecode++;
3133     goto REPEATNOTCHAR;
3134    
3135     case OP_NOTPOSQUERY:
3136     possessive = TRUE;
3137     min = 0;
3138     max = 1;
3139     ecode++;
3140     goto REPEATNOTCHAR;
3141    
3142     case OP_NOTPOSUPTO:
3143     possessive = TRUE;
3144     min = 0;
3145     max = GET2(ecode, 1);
3146     ecode += 3;
3147     goto REPEATNOTCHAR;
3148    
3149 nigel 77 case OP_NOTSTAR:
3150     case OP_NOTMINSTAR:
3151     case OP_NOTPLUS:
3152     case OP_NOTMINPLUS:
3153     case OP_NOTQUERY:
3154     case OP_NOTMINQUERY:
3155     c = *ecode++ - OP_NOTSTAR;
3156     minimize = (c & 1) != 0;
3157     min = rep_min[c]; /* Pick up values from tables; */
3158     max = rep_max[c]; /* zero for max => infinity */
3159     if (max == 0) max = INT_MAX;
3160    
3161 ph10 426 /* Common code for all repeated single-byte matches. */
3162 nigel 77
3163     REPEATNOTCHAR:
3164     fc = *ecode++;
3165    
3166     /* The code is duplicated for the caseless and caseful cases, for speed,
3167     since matching characters is likely to be quite common. First, ensure the
3168     minimum number of matches are present. If min = max, continue at the same
3169     level without recursing. Otherwise, if minimizing, keep trying the rest of
3170     the expression and advancing one matching character if failing, up to the
3171     maximum. Alternatively, if maximizing, find the maximum number of
3172     characters and work backwards. */
3173    
3174     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3175     max, eptr));
3176    
3177     if ((ims & PCRE_CASELESS) != 0)
3178     {
3179     fc = md->lcc[fc];
3180    
3181     #ifdef SUPPORT_UTF8
3182     /* UTF-8 mode */
3183     if (utf8)
3184     {
3185 nigel 93 register unsigned int d;
3186 nigel 77 for (i = 1; i <= min; i++)
3187     {
3188 ph10 426 if (eptr >= md->end_subject)
3189     {
3190     SCHECK_PARTIAL();
3191 ph10 510 MRRETURN(MATCH_NOMATCH);
3192 ph10 427 }
3193 nigel 77 GETCHARINC(d, eptr);
3194     if (d < 256) d = md->lcc[d];
3195 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3196 nigel 77 }
3197     }
3198     else
3199     #endif
3200    
3201     /* Not UTF-8 mode */
3202     {
3203     for (i = 1; i <= min; i++)
3204 ph10 426 {
3205     if (eptr >= md->end_subject)
3206     {
3207     SCHECK_PARTIAL();
3208 ph10 510 MRRETURN(MATCH_NOMATCH);
3209 ph10 427 }
3210 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3211 ph10 427 }
3212 nigel 77 }
3213    
3214     if (min == max) continue;
3215    
3216     if (minimize)
3217     {
3218     #ifdef SUPPORT_UTF8
3219     /* UTF-8 mode */
3220     if (utf8)
3221     {
3222 nigel 93 register unsigned int d;
3223 nigel 77 for (fi = min;; fi++)
3224     {
3225 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3226 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3227 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3228 ph10 427 if (eptr >= md->end_subject)
3229 ph10 426 {
3230 ph10 427 SCHECK_PARTIAL();
3231 ph10 510 MRRETURN(MATCH_NOMATCH);
3232 ph10 427 }
3233 nigel 77 GETCHARINC(d, eptr);
3234     if (d < 256) d = md->lcc[d];
3235 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3236 nigel 77 }
3237     }
3238     else
3239     #endif
3240     /* Not UTF-8 mode */
3241     {
3242     for (fi = min;; fi++)
3243     {
3244 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3245 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3246 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3247 ph10 426 if (eptr >= md->end_subject)
3248     {
3249     SCHECK_PARTIAL();
3250 ph10 510 MRRETURN(MATCH_NOMATCH);
3251 ph10 426 }
3252 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3253 nigel 77 }
3254     }
3255     /* Control never gets here */
3256     }
3257    
3258     /* Maximize case */
3259    
3260     else
3261     {
3262     pp = eptr;
3263    
3264     #ifdef SUPPORT_UTF8
3265     /* UTF-8 mode */
3266     if (utf8)
3267     {
3268 nigel 93 register unsigned int d;
3269 nigel 77 for (i = min; i < max; i++)
3270     {
3271     int len = 1;
3272 ph10 463 if (eptr >= md->end_subject)
3273 ph10 462 {
3274 ph10 463 SCHECK_PARTIAL();
3275 ph10 462 break;
3276 ph10 463 }
3277 nigel 77 GETCHARLEN(d, eptr, len);
3278     if (d < 256) d = md->lcc[d];
3279     if (fc == d) break;
3280     eptr += len;
3281     }
3282 nigel 93 if (possessive) continue;
3283     for(;;)
3284 nigel 77 {
3285 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3286 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3287     if (eptr-- == pp) break; /* Stop if tried at original pos */
3288     BACKCHAR(eptr);
3289     }
3290     }
3291     else
3292     #endif
3293     /* Not UTF-8 mode */
3294     {
3295     for (i = min; i < max; i++)
3296     {
3297 ph10 463 if (eptr >= md->end_subject)
3298 ph10 462 {
3299     SCHECK_PARTIAL();
3300     break;
3301 ph10 463 }
3302 ph10 462 if (fc == md->lcc[*eptr]) break;
3303 nigel 77 eptr++;
3304     }
3305 nigel 93 if (possessive) continue;
3306 nigel 77 while (eptr >= pp)
3307     {
3308 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3309 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3310     eptr--;
3311     }
3312     }
3313    
3314 ph10 510 MRRETURN(MATCH_NOMATCH);
3315 nigel 77 }
3316     /* Control never gets here */
3317     }
3318    
3319     /* Caseful comparisons */
3320    
3321     else
3322     {
3323     #ifdef SUPPORT_UTF8
3324     /* UTF-8 mode */
3325     if (utf8)
3326     {
3327 nigel 93 register unsigned int d;
3328 nigel 77 for (i = 1; i <= min; i++)
3329     {
3330 ph10 426 if (eptr >= md->end_subject)
3331     {
3332     SCHECK_PARTIAL();
3333 ph10 510 MRRETURN(MATCH_NOMATCH);
3334 ph10 427 }
3335 nigel 77 GETCHARINC(d, eptr);
3336 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3337 nigel 77 }
3338     }
3339     else
3340     #endif
3341     /* Not UTF-8 mode */
3342     {
3343     for (i = 1; i <= min; i++)
3344 ph10 426 {
3345     if (eptr >= md->end_subject)
3346     {
3347     SCHECK_PARTIAL();
3348 ph10 510 MRRETURN(MATCH_NOMATCH);
3349 ph10 427 }
3350 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3351 ph10 427 }
3352 nigel 77 }
3353    
3354     if (min == max) continue;
3355    
3356     if (minimize)
3357     {
3358     #ifdef SUPPORT_UTF8
3359     /* UTF-8 mode */
3360     if (utf8)
3361     {
3362 nigel 93 register unsigned int d;
3363 nigel 77 for (fi = min;; fi++)
3364     {
3365 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3366 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3367 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3368 ph10 427 if (eptr >= md->end_subject)
3369 ph10 426 {
3370 ph10 427 SCHECK_PARTIAL();
3371 ph10 510 MRRETURN(MATCH_NOMATCH);
3372 ph10 427 }
3373 nigel 77 GETCHARINC(d, eptr);
3374 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3375 nigel 77 }
3376     }
3377     else
3378     #endif
3379     /* Not UTF-8 mode */
3380     {
3381     for (fi = min;; fi++)
3382     {
3383 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3384 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3385 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3386 ph10 426 if (eptr >= md->end_subject)
3387     {
3388     SCHECK_PARTIAL();
3389 ph10 510 MRRETURN(MATCH_NOMATCH);
3390 ph10 427 }
3391 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3392 nigel 77 }
3393     }
3394     /* Control never gets here */
3395     }
3396    
3397     /* Maximize case */
3398    
3399     else
3400     {
3401     pp = eptr;
3402    
3403     #ifdef SUPPORT_UTF8
3404     /* UTF-8 mode */
3405     if (utf8)
3406     {
3407 nigel 93 register unsigned int d;
3408 nigel 77 for (i = min; i < max; i++)
3409     {
3410     int len = 1;
3411 ph10 463 if (eptr >= md->end_subject)
3412 ph10 462 {
3413 ph10 463 SCHECK_PARTIAL();
3414 ph10 462 break;
3415 ph10 463 }
3416 nigel 77 GETCHARLEN(d, eptr, len);
3417     if (fc == d) break;
3418     eptr += len;
3419     }
3420 nigel 93 if (possessive) continue;
3421 nigel 77 for(;;)
3422     {
3423 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3424 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3425     if (eptr-- == pp) break; /* Stop if tried at original pos */
3426     BACKCHAR(eptr);
3427     }
3428     }
3429     else
3430     #endif
3431     /* Not UTF-8 mode */
3432     {
3433     for (i = min; i < max; i++)
3434     {
3435 ph10 463 if (eptr >= md->end_subject)
3436 ph10 462 {
3437 ph10 463 SCHECK_PARTIAL();
3438 ph10 462 break;
3439 ph10 463 }
3440 ph10 462 if (fc == *eptr) break;
3441 nigel 77 eptr++;
3442     }
3443 nigel 93 if (possessive) continue;
3444 nigel 77 while (eptr >= pp)
3445     {
3446 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3447 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448     eptr--;
3449     }
3450     }
3451    
3452 ph10 510 MRRETURN(MATCH_NOMATCH);
3453 nigel 77 }
3454     }
3455     /* Control never gets here */
3456    
3457     /* Match a single character type repeatedly; several different opcodes
3458     share code. This is very similar to the code for single characters, but we
3459     repeat it in the interests of efficiency. */
3460    
3461     case OP_TYPEEXACT:
3462     min = max = GET2(ecode, 1);
3463     minimize = TRUE;
3464     ecode += 3;
3465     goto REPEATTYPE;
3466    
3467     case OP_TYPEUPTO:
3468     case OP_TYPEMINUPTO:
3469     min = 0;
3470     max = GET2(ecode, 1);
3471     minimize = *ecode == OP_TYPEMINUPTO;
3472     ecode += 3;
3473     goto REPEATTYPE;
3474    
3475 nigel 93 case OP_TYPEPOSSTAR:
3476     possessive = TRUE;
3477     min = 0;
3478     max = INT_MAX;
3479     ecode++;
3480     goto REPEATTYPE;
3481    
3482     case OP_TYPEPOSPLUS:
3483     possessive = TRUE;
3484     min = 1;
3485     max = INT_MAX;
3486     ecode++;
3487     goto REPEATTYPE;
3488    
3489     case OP_TYPEPOSQUERY:
3490     possessive = TRUE;
3491     min = 0;
3492     max = 1;
3493     ecode++;
3494     goto REPEATTYPE;
3495    
3496     case OP_TYPEPOSUPTO:
3497     possessive = TRUE;
3498     min = 0;
3499     max = GET2(ecode, 1);
3500     ecode += 3;
3501     goto REPEATTYPE;
3502    
3503 nigel 77 case OP_TYPESTAR:
3504     case OP_TYPEMINSTAR:
3505     case OP_TYPEPLUS:
3506     case OP_TYPEMINPLUS:
3507     case OP_TYPEQUERY:
3508     case OP_TYPEMINQUERY:
3509     c = *ecode++ - OP_TYPESTAR;
3510     minimize = (c & 1) != 0;
3511     min = rep_min[c]; /* Pick up values from tables; */
3512     max = rep_max[c]; /* zero for max => infinity */
3513     if (max == 0) max = INT_MAX;
3514    
3515     /* Common code for all repeated single character type matches. Note that
3516     in UTF-8 mode, '.' matches a character of any length, but for the other
3517     character types, the valid characters are all one-byte long. */
3518    
3519     REPEATTYPE:
3520     ctype = *ecode++; /* Code for the character type */
3521    
3522     #ifdef SUPPORT_UCP
3523     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3524     {
3525     prop_fail_result = ctype == OP_NOTPROP;
3526     prop_type = *ecode++;
3527 nigel 87 prop_value = *ecode++;
3528 nigel 77 }
3529     else prop_type = -1;
3530     #endif
3531    
3532     /* First, ensure the minimum number of matches are present. Use inline
3533     code for maximizing the speed, and do the type test once at the start
3534 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3535 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3536     and single-bytes. */
3537    
3538     if (min > 0)
3539     {
3540     #ifdef SUPPORT_UCP
3541 nigel 87 if (prop_type >= 0)
3542 nigel 77 {
3543 nigel 87 switch(prop_type)
3544 nigel 77 {
3545 nigel 87 case PT_ANY:
3546 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3547 nigel 87 for (i = 1; i <= min; i++)
3548     {
3549 ph10 427 if (eptr >= md->end_subject)
3550 ph10 426 {
3551 ph10 427 SCHECK_PARTIAL();
3552 ph10 510 MRRETURN(MATCH_NOMATCH);
3553 ph10 427 }
3554 ph10 184 GETCHARINCTEST(c, eptr);
3555 nigel 87 }
3556     break;
3557    
3558     case PT_LAMP:
3559     for (i = 1; i <= min; i++)
3560     {
3561 ph10 427 if (eptr >= md->end_subject)
3562 ph10 426 {
3563 ph10 427 SCHECK_PARTIAL();
3564 ph10 510 MRRETURN(MATCH_NOMATCH);
3565 ph10 427 }
3566 ph10 184 GETCHARINCTEST(c, eptr);
3567 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3568 nigel 87 if ((prop_chartype == ucp_Lu ||
3569     prop_chartype == ucp_Ll ||
3570     prop_chartype == ucp_Lt) == prop_fail_result)
3571 ph10 510 MRRETURN(MATCH_NOMATCH);
3572 nigel 87 }
3573     break;
3574    
3575     case PT_GC:
3576     for (i = 1; i <= min; i++)
3577     {
3578 ph10 427 if (eptr >= md->end_subject)
3579 ph10 426 {
3580 ph10 427 SCHECK_PARTIAL();
3581 ph10 510 MRRETURN(MATCH_NOMATCH);
3582 ph10 427 }
3583 ph10 184 GETCHARINCTEST(c, eptr);
3584 ph10 349 prop_category = UCD_CATEGORY(c);
3585 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3586 ph10 510 MRRETURN(MATCH_NOMATCH);
3587 nigel 87 }
3588     break;
3589    
3590     case PT_PC:
3591     for (i = 1; i <= min; i++)
3592     {
3593 ph10 427 if (eptr >= md->end_subject)
3594 ph10 426 {
3595 ph10 427 SCHECK_PARTIAL();
3596 ph10 510 MRRETURN(MATCH_NOMATCH);
3597 ph10 427 }
3598 ph10 184 GETCHARINCTEST(c, eptr);
3599 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3600 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3601 ph10 510 MRRETURN(MATCH_NOMATCH);
3602 nigel 87 }
3603     break;
3604    
3605     case PT_SC:
3606     for (i = 1; i <= min; i++)
3607     {
3608 ph10 427 if (eptr >= md->end_subject)
3609 ph10 426 {
3610 ph10 427 SCHECK_PARTIAL();
3611 ph10 510 MRRETURN(MATCH_NOMATCH);
3612 ph10 427 }
3613 ph10 184 GETCHARINCTEST(c, eptr);
3614 ph10 349 prop_script = UCD_SCRIPT(c);
3615 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3616 ph10 510 MRRETURN(MATCH_NOMATCH);
3617 nigel 87 }
3618     break;
3619 ph10 527
3620 ph10 517 case PT_ALNUM:
3621     for (i = 1; i <= min; i++)
3622     {
3623     if (eptr >= md->end_subject)
3624     {
3625     SCHECK_PARTIAL();
3626     MRRETURN(MATCH_NOMATCH);
3627     }
3628     GETCHARINCTEST(c, eptr);
3629 ph10 527 prop_category = UCD_CATEGORY(c);
3630     if ((prop_category == ucp_L || prop_category == ucp_N)
3631 ph10 517 == prop_fail_result)
3632     MRRETURN(MATCH_NOMATCH);
3633     }
3634     break;
3635 ph10 527
3636 ph10 517 case PT_SPACE: /* Perl space */
3637     for (i = 1; i <= min; i++)
3638     {
3639     if (eptr >= md->end_subject)
3640     {
3641     SCHECK_PARTIAL();
3642     MRRETURN(MATCH_NOMATCH);
3643     }
3644     GETCHARINCTEST(c, eptr);
3645 ph10 527 prop_category = UCD_CATEGORY(c);
3646     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3647     c == CHAR_FF || c == CHAR_CR)
3648 ph10 517 == prop_fail_result)
3649     MRRETURN(MATCH_NOMATCH);
3650     }
3651     break;
3652 ph10 527
3653 ph10 517 case PT_PXSPACE: /* POSIX space */
3654     for (i = 1; i <= min; i++)
3655     {
3656     if (eptr >= md->end_subject)
3657     {
3658     SCHECK_PARTIAL();
3659     MRRETURN(MATCH_NOMATCH);
3660     }
3661     GETCHARINCTEST(c, eptr);
3662 ph10 527 prop_category = UCD_CATEGORY(c);
3663     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3664     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3665 ph10 517 == prop_fail_result)
3666     MRRETURN(MATCH_NOMATCH);
3667     }
3668     break;
3669 ph10 527
3670     case PT_WORD:
3671 ph10 517 for (i = 1; i <= min; i++)
3672     {
3673     if (eptr >= md->end_subject)
3674     {
3675     SCHECK_PARTIAL();
3676     MRRETURN(MATCH_NOMATCH);
3677     }
3678     GETCHARINCTEST(c, eptr);
3679 ph10 527 prop_category = UCD_CATEGORY(c);
3680 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3681 ph10 527 c == CHAR_UNDERSCORE)
3682 ph10 517 == prop_fail_result)
3683     MRRETURN(MATCH_NOMATCH);
3684     }
3685     break;
3686 ph10 527
3687 ph10 517 /* This should not occur */
3688 nigel 87
3689     default:
3690     RRETURN(PCRE_ERROR_INTERNAL);
3691 nigel 77 }
3692     }
3693    
3694     /* Match extended Unicode sequences. We will get here only if the
3695     support is in the binary; otherwise a compile-time error occurs. */
3696    
3697     else if (ctype == OP_EXTUNI)
3698     {
3699     for (i = 1; i <= min; i++)
3700     {
3701 ph10 427 if (eptr >= md->end_subject)
3702 ph10 426 {
3703 ph10 427 SCHECK_PARTIAL();
3704 ph10 510 MRRETURN(MATCH_NOMATCH);
3705 ph10 427 }
3706 nigel 77 GETCHARINCTEST(c, eptr);
3707 ph10 349 prop_category = UCD_CATEGORY(c);
3708 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3709 nigel 77 while (eptr < md->end_subject)
3710     {
3711     int len = 1;
3712 ph10 426 if (!utf8) c = *eptr;
3713     else { GETCHARLEN(c, eptr, len); }
3714 ph10 349 prop_category = UCD_CATEGORY(c);
3715 nigel 77 if (prop_category != ucp_M) break;
3716     eptr += len;
3717     }
3718     }
3719     }
3720    
3721     else
3722     #endif /* SUPPORT_UCP */
3723    
3724     /* Handle all other cases when the coding is UTF-8 */
3725    
3726     #ifdef SUPPORT_UTF8
3727     if (utf8) switch(ctype)
3728     {
3729     case OP_ANY:
3730     for (i = 1; i <= min; i++)
3731     {
3732 ph10 426 if (eptr >= md->end_subject)
3733     {
3734 ph10 427 SCHECK_PARTIAL();
3735 ph10 510 MRRETURN(MATCH_NOMATCH);
3736 ph10 427 }
3737 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3738 nigel 91 eptr++;
3739 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3740     }
3741     break;
3742    
3743 ph10 341 case OP_ALLANY:
3744     for (i = 1; i <= min; i++)
3745     {
3746 ph10 427 if (eptr >= md->end_subject)
3747 ph10 426 {
3748     SCHECK_PARTIAL();
3749 ph10 510 MRRETURN(MATCH_NOMATCH);
3750 ph10 427 }
3751 ph10 341 eptr++;
3752     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3753     }
3754     break;
3755    
3756 nigel 77 case OP_ANYBYTE:
3757 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3758 nigel 77 eptr += min;
3759     break;
3760    
3761 nigel 93 case OP_ANYNL:
3762     for (i = 1; i <= min; i++)
3763     {
3764 ph10 427 if (eptr >= md->end_subject)
3765 ph10 426 {
3766     SCHECK_PARTIAL();
3767 ph10 510 MRRETURN(MATCH_NOMATCH);
3768 ph10 427 }
3769 nigel 93 GETCHARINC(c, eptr);
3770     switch(c)
3771     {
3772 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3773 nigel 93 case 0x000d:
3774     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3775     break;
3776 ph10 231
3777 nigel 93 case 0x000a:
3778 ph10 231 break;
3779    
3780 nigel 93 case 0x000b:
3781     case 0x000c:
3782     case 0x0085:
3783     case 0x2028:
3784     case 0x2029:
3785 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3786 nigel 93 break;
3787     }
3788     }
3789     break;
3790    
3791 ph10 178 case OP_NOT_HSPACE:
3792     for (i = 1; i <= min; i++)
3793     {
3794 ph10 427 if (eptr >= md->end_subject)
3795 ph10 426 {
3796     SCHECK_PARTIAL();
3797 ph10 510 MRRETURN(MATCH_NOMATCH);
3798 ph10 427 }
3799 ph10 178 GETCHARINC(c, eptr);
3800     switch(c)
3801     {
3802     default: break;
3803     case 0x09: /* HT */
3804     case 0x20: /* SPACE */
3805     case 0xa0: /* NBSP */
3806     case 0x1680: /* OGHAM SPACE MARK */
3807     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3808     case 0x2000: /* EN QUAD */
3809     case 0x2001: /* EM QUAD */
3810     case 0x2002: /* EN SPACE */
3811     case 0x2003: /* EM SPACE */
3812     case 0x2004: /* THREE-PER-EM SPACE */
3813     case 0x2005: /* FOUR-PER-EM SPACE */
3814     case 0x2006: /* SIX-PER-EM SPACE */
3815     case 0x2007: /* FIGURE SPACE */
3816     case 0x2008: /* PUNCTUATION SPACE */
3817     case 0x2009: /* THIN SPACE */
3818     case 0x200A: /* HAIR SPACE */
3819     case 0x202f: /* NARROW NO-BREAK SPACE */
3820     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3821     case 0x3000: /* IDEOGRAPHIC SPACE */
3822 ph10 510 MRRETURN(MATCH_NOMATCH);
3823 ph10 178 }
3824     }
3825     break;
3826 ph10 182
3827 ph10 178 case OP_HSPACE:
3828     for (i = 1; i <= min; i++)
3829     {
3830 ph10 427 if (eptr >= md->end_subject)
3831 ph10 426 {
3832 ph10 427 SCHECK_PARTIAL();
3833 ph10 510 MRRETURN(MATCH_NOMATCH);
3834 ph10 427 }
3835 ph10 178 GETCHARINC(c, eptr);
3836     switch(c)
3837     {
3838 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3839 ph10 178 case 0x09: /* HT */
3840     case 0x20: /* SPACE */
3841     case 0xa0: /* NBSP */
3842     case 0x1680: /* OGHAM SPACE MARK */
3843     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3844     case 0x2000: /* EN QUAD */
3845     case 0x2001: /* EM QUAD */
3846     case 0x2002: /* EN SPACE */
3847     case 0x2003: /* EM SPACE */
3848     case 0x2004: /* THREE-PER-EM SPACE */
3849     case 0x2005: /* FOUR-PER-EM SPACE */
3850     case 0x2006: /* SIX-PER-EM SPACE */
3851     case 0x2007: /* FIGURE SPACE */
3852     case 0x2008: /* PUNCTUATION SPACE */
3853     case 0x2009: /* THIN SPACE */
3854     case 0x200A: /* HAIR SPACE */
3855     case 0x202f: /* NARROW NO-BREAK SPACE */
3856     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3857     case 0x3000: /* IDEOGRAPHIC SPACE */
3858     break;
3859     }
3860     }
3861     break;
3862 ph10 182
3863 ph10 178 case OP_NOT_VSPACE:
3864     for (i = 1; i <= min; i++)
3865     {
3866 ph10 427 if (eptr >= md->end_subject)
3867 ph10 426 {
3868 ph10 427 SCHECK_PARTIAL();
3869 ph10 510 MRRETURN(MATCH_NOMATCH);
3870 ph10 427 }
3871 ph10 178 GETCHARINC(c, eptr);
3872     switch(c)
3873     {
3874     default: break;
3875     case 0x0a: /* LF */
3876     case 0x0b: /* VT */
3877     case 0x0c: /* FF */
3878     case 0x0d: /* CR */
3879     case 0x85: /* NEL */
3880     case 0x2028: /* LINE SEPARATOR */
3881     case 0x2029: /* PARAGRAPH SEPARATOR */
3882 ph10 510 MRRETURN(MATCH_NOMATCH);
3883 ph10 178 }
3884     }
3885     break;
3886 ph10 182
3887 ph10 178 case OP_VSPACE:
3888     for (i = 1; i <= min; i++)
3889     {
3890 ph10 427 if (eptr >= md->end_subject)
3891 ph10 426 {
3892 ph10 427 SCHECK_PARTIAL();
3893 ph10 510 MRRETURN(MATCH_NOMATCH);
3894 ph10 427 }
3895 ph10 178 GETCHARINC(c, eptr);
3896     switch(c)
3897     {
3898 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3899 ph10 178 case 0x0a: /* LF */
3900     case 0x0b: /* VT */
3901     case 0x0c: /* FF */
3902     case 0x0d: /* CR */
3903     case 0x85: /* NEL */
3904     case 0x2028: /* LINE SEPARATOR */
3905     case 0x2029: /* PARAGRAPH SEPARATOR */
3906 ph10 182 break;
3907 ph10 178 }
3908     }
3909     break;
3910    
3911 nigel 77 case OP_NOT_DIGIT:
3912     for (i = 1; i <= min; i++)
3913     {
3914 ph10 427 if (eptr >= md->end_subject)
3915 ph10 426 {
3916 ph10 427 SCHECK_PARTIAL();
3917 ph10 510 MRRETURN(MATCH_NOMATCH);
3918 ph10 427 }
3919 nigel 77 GETCHARINC(c, eptr);
3920     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3921 ph10 510 MRRETURN(MATCH_NOMATCH);
3922 nigel 77 }
3923     break;
3924    
3925     case OP_DIGIT:
3926     for (i = 1; i <= min; i++)
3927     {
3928 ph10 427 if (eptr >= md->end_subject)