/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 427 - (hide annotations) (download)
Fri Aug 28 09:55:54 2009 UTC (3 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 161361 byte(s)
Add new PCRE_PARTIAL_HARD option.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 381 Copyright (c) 1997-2009 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92     #ifdef DEBUG
93     /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144     #ifdef DEBUG
145     if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 164 actuall used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 nigel 87 #ifdef DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 164 newframe->Xoffset_top = rc;\
292     newframe->Xims = re;\
293     newframe->Xeptrb = rf;\
294     newframe->Xflags = rg;\
295     newframe->Xrdepth = frame->Xrdepth + 1;\
296     newframe->Xprevframe = frame;\
297     frame = newframe;\
298     DPRINTF(("restarting from line %d\n", __LINE__));\
299     goto HEAP_RECURSE;\
300     L_##rw:\
301     DPRINTF(("jumped back to line %d\n", __LINE__));\
302 nigel 77 }
303    
304     #define RRETURN(ra)\
305     {\
306     heapframe *newframe = frame;\
307     frame = newframe->Xprevframe;\
308     (pcre_stack_free)(newframe);\
309     if (frame != NULL)\
310     {\
311 ph10 164 rrc = ra;\
312     goto HEAP_RETURN;\
313 nigel 77 }\
314     return ra;\
315     }
316    
317    
318     /* Structure for remembering the local variables in a private frame */
319    
320     typedef struct heapframe {
321     struct heapframe *Xprevframe;
322    
323     /* Function arguments that may change */
324    
325 ph10 409 USPTR Xeptr;
326 nigel 77 const uschar *Xecode;
327 ph10 409 USPTR Xmstart;
328 nigel 77 int Xoffset_top;
329     long int Xims;
330     eptrblock *Xeptrb;
331     int Xflags;
332 nigel 91 unsigned int Xrdepth;
333 nigel 77
334     /* Function local variables */
335    
336 ph10 409 USPTR Xcallpat;
337 ph10 406 #ifdef SUPPORT_UTF8
338 ph10 409 USPTR Xcharptr;
339 ph10 406 #endif
340 ph10 409 USPTR Xdata;
341     USPTR Xnext;
342     USPTR Xpp;
343     USPTR Xprev;
344     USPTR Xsaved_eptr;
345 nigel 77
346     recursion_info Xnew_recursive;
347    
348     BOOL Xcur_is_word;
349     BOOL Xcondition;
350     BOOL Xprev_is_word;
351    
352     unsigned long int Xoriginal_ims;
353    
354     #ifdef SUPPORT_UCP
355     int Xprop_type;
356 nigel 87 int Xprop_value;
357 nigel 77 int Xprop_fail_result;
358     int Xprop_category;
359     int Xprop_chartype;
360 nigel 87 int Xprop_script;
361 ph10 123 int Xoclength;
362     uschar Xocchars[8];
363 nigel 77 #endif
364    
365 ph10 403 int Xcodelink;
366 nigel 77 int Xctype;
367 nigel 93 unsigned int Xfc;
368 nigel 77 int Xfi;
369     int Xlength;
370     int Xmax;
371     int Xmin;
372     int Xnumber;
373     int Xoffset;
374     int Xop;
375     int Xsave_capture_last;
376     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377     int Xstacksave[REC_STACK_SAVE_MAX];
378    
379     eptrblock Xnewptrb;
380    
381 ph10 164 /* Where to jump back to */
382 nigel 77
383 ph10 164 int Xwhere;
384 ph10 165
385 nigel 77 } heapframe;
386    
387     #endif
388    
389    
390     /***************************************************************************
391     ***************************************************************************/
392    
393    
394    
395     /*************************************************
396     * Match from current position *
397     *************************************************/
398    
399 nigel 93 /* This function is called recursively in many circumstances. Whenever it
400 nigel 77 returns a negative (error) response, the outer incarnation must also return the
401 ph10 426 same response. */
402 nigel 77
403 ph10 426 /* These macros pack up tests that are used for partial matching, and which
404     appears several times in the code. We set the "hit end" flag if the pointer is
405     at the end of the subject and also past the start of the subject (i.e.
406 ph10 427 something has been matched). For hard partial matching, we then return
407     immediately. The second one is used when we already know we are past the end of
408     the subject. */
409 ph10 426
410     #define CHECK_PARTIAL()\
411     if (md->partial && eptr >= md->end_subject && eptr > mstart)\
412 ph10 427 {\
413     md->hitend = TRUE;\
414     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415     }
416 ph10 426
417     #define SCHECK_PARTIAL()\
418 ph10 427 if (md->partial && eptr > mstart)\
419     {\
420     md->hitend = TRUE;\
421     md->hitend = TRUE;\
422     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
423     }
424 ph10 426
425 ph10 427
426 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
427     the md structure (e.g. utf8, end_subject) into individual variables to improve
428 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
429     made performance worse.
430    
431     Arguments:
432 nigel 93 eptr pointer to current character in subject
433     ecode pointer to current position in compiled code
434 ph10 168 mstart pointer to the current match start position (can be modified
435 ph10 172 by encountering \K)
436 nigel 77 offset_top current top pointer
437     md pointer to "static" info for the match
438     ims current /i, /m, and /s options
439     eptrb pointer to chain of blocks containing eptr at start of
440     brackets - for testing for empty matches
441     flags can contain
442     match_condassert - this is an assertion condition
443 nigel 93 match_cbegroup - this is the start of an unlimited repeat
444     group that can match an empty string
445 nigel 87 rdepth the recursion depth
446 nigel 77
447     Returns: MATCH_MATCH if matched ) these values are >= 0
448     MATCH_NOMATCH if failed to match )
449     a negative PCRE_ERROR_xxx value if aborted by an error condition
450 nigel 87 (e.g. stopped by repeated call or recursion limit)
451 nigel 77 */
452    
453     static int
454 ph10 409 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
455 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
456 nigel 91 int flags, unsigned int rdepth)
457 nigel 77 {
458     /* These variables do not need to be preserved over recursion in this function,
459 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
460     "register" because they are used a lot in loops. */
461 nigel 77
462 nigel 91 register int rrc; /* Returns from recursive calls */
463     register int i; /* Used for loops not involving calls to RMATCH() */
464 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
465 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
466 nigel 77
467 nigel 93 BOOL minimize, possessive; /* Quantifier options */
468 ph10 403 int condcode;
469 nigel 93
470 nigel 77 /* When recursion is not being used, all "local" variables that have to be
471     preserved over calls to RMATCH() are part of a "frame" which is obtained from
472     heap storage. Set up the top-level frame here; others are obtained from the
473     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
474    
475     #ifdef NO_RECURSE
476     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
477     frame->Xprevframe = NULL; /* Marks the top level */
478    
479     /* Copy in the original argument variables */
480    
481     frame->Xeptr = eptr;
482     frame->Xecode = ecode;
483 ph10 168 frame->Xmstart = mstart;
484 nigel 77 frame->Xoffset_top = offset_top;
485     frame->Xims = ims;
486     frame->Xeptrb = eptrb;
487     frame->Xflags = flags;
488 nigel 87 frame->Xrdepth = rdepth;
489 nigel 77
490     /* This is where control jumps back to to effect "recursion" */
491    
492     HEAP_RECURSE:
493    
494     /* Macros make the argument variables come from the current frame */
495    
496     #define eptr frame->Xeptr
497     #define ecode frame->Xecode
498 ph10 168 #define mstart frame->Xmstart
499 nigel 77 #define offset_top frame->Xoffset_top
500     #define ims frame->Xims
501     #define eptrb frame->Xeptrb
502     #define flags frame->Xflags
503 nigel 87 #define rdepth frame->Xrdepth
504 nigel 77
505     /* Ditto for the local variables */
506    
507     #ifdef SUPPORT_UTF8
508     #define charptr frame->Xcharptr
509     #endif
510     #define callpat frame->Xcallpat
511 ph10 403 #define codelink frame->Xcodelink
512 nigel 77 #define data frame->Xdata
513     #define next frame->Xnext
514     #define pp frame->Xpp
515     #define prev frame->Xprev
516     #define saved_eptr frame->Xsaved_eptr
517    
518     #define new_recursive frame->Xnew_recursive
519    
520     #define cur_is_word frame->Xcur_is_word
521     #define condition frame->Xcondition
522     #define prev_is_word frame->Xprev_is_word
523    
524     #define original_ims frame->Xoriginal_ims
525    
526     #ifdef SUPPORT_UCP
527     #define prop_type frame->Xprop_type
528 nigel 87 #define prop_value frame->Xprop_value
529 nigel 77 #define prop_fail_result frame->Xprop_fail_result
530     #define prop_category frame->Xprop_category
531     #define prop_chartype frame->Xprop_chartype
532 nigel 87 #define prop_script frame->Xprop_script
533 ph10 115 #define oclength frame->Xoclength
534     #define occhars frame->Xocchars
535 nigel 77 #endif
536    
537     #define ctype frame->Xctype
538     #define fc frame->Xfc
539     #define fi frame->Xfi
540     #define length frame->Xlength
541     #define max frame->Xmax
542     #define min frame->Xmin
543     #define number frame->Xnumber
544     #define offset frame->Xoffset
545     #define op frame->Xop
546     #define save_capture_last frame->Xsave_capture_last
547     #define save_offset1 frame->Xsave_offset1
548     #define save_offset2 frame->Xsave_offset2
549     #define save_offset3 frame->Xsave_offset3
550     #define stacksave frame->Xstacksave
551    
552     #define newptrb frame->Xnewptrb
553    
554     /* When recursion is being used, local variables are allocated on the stack and
555     get preserved during recursion in the normal way. In this environment, fi and
556     i, and fc and c, can be the same variables. */
557    
558 nigel 93 #else /* NO_RECURSE not defined */
559 nigel 77 #define fi i
560     #define fc c
561    
562    
563 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
564     const uschar *charptr; /* in small blocks of the code. My normal */
565     #endif /* style of coding would have declared */
566     const uschar *callpat; /* them within each of those blocks. */
567     const uschar *data; /* However, in order to accommodate the */
568     const uschar *next; /* version of this code that uses an */
569     USPTR pp; /* external "stack" implemented on the */
570     const uschar *prev; /* heap, it is easier to declare them all */
571     USPTR saved_eptr; /* here, so the declarations can be cut */
572     /* out in a block. The only declarations */
573     recursion_info new_recursive; /* within blocks below are for variables */
574     /* that do not have to be preserved over */
575     BOOL cur_is_word; /* a recursive call to RMATCH(). */
576     BOOL condition;
577 nigel 77 BOOL prev_is_word;
578    
579     unsigned long int original_ims;
580    
581     #ifdef SUPPORT_UCP
582     int prop_type;
583 nigel 87 int prop_value;
584 nigel 77 int prop_fail_result;
585     int prop_category;
586     int prop_chartype;
587 nigel 87 int prop_script;
588 ph10 115 int oclength;
589     uschar occhars[8];
590 nigel 77 #endif
591    
592 ph10 399 int codelink;
593 nigel 77 int ctype;
594     int length;
595     int max;
596     int min;
597     int number;
598     int offset;
599     int op;
600     int save_capture_last;
601     int save_offset1, save_offset2, save_offset3;
602     int stacksave[REC_STACK_SAVE_MAX];
603    
604     eptrblock newptrb;
605 nigel 93 #endif /* NO_RECURSE */
606 nigel 77
607     /* These statements are here to stop the compiler complaining about unitialized
608     variables. */
609    
610     #ifdef SUPPORT_UCP
611 nigel 87 prop_value = 0;
612 nigel 77 prop_fail_result = 0;
613     #endif
614    
615 nigel 93
616 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
617     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
618     used. Thanks to Ian Taylor for noticing this possibility and sending the
619     original patch. */
620    
621     TAIL_RECURSE:
622    
623 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
624     are specified by the macro RMATCH and RRETURN is used to return. When
625     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
626     and a "return", respectively (possibly with some debugging if DEBUG is
627     defined). However, RMATCH isn't like a function call because it's quite a
628     complicated macro. It has to be used in one particular way. This shouldn't,
629     however, impact performance when true recursion is being used. */
630 nigel 77
631 ph10 164 #ifdef SUPPORT_UTF8
632     utf8 = md->utf8; /* Local copy of the flag */
633     #else
634     utf8 = FALSE;
635     #endif
636    
637 nigel 87 /* First check that we haven't called match() too many times, or that we
638     haven't exceeded the recursive call limit. */
639    
640 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
641 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
642 nigel 77
643     original_ims = ims; /* Save for resetting on ')' */
644 nigel 91
645 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
646     string, the match_cbegroup flag is set. When this is the case, add the current
647     subject pointer to the chain of such remembered pointers, to be checked when we
648     hit the closing ket, in order to break infinite loops that match no characters.
649 ph10 197 When match() is called in other circumstances, don't add to the chain. The
650     match_cbegroup flag must NOT be used with tail recursion, because the memory
651     block that is used is on the stack, so a new one may be required for each
652     match(). */
653 nigel 77
654 nigel 93 if ((flags & match_cbegroup) != 0)
655 nigel 77 {
656 ph10 197 newptrb.epb_saved_eptr = eptr;
657     newptrb.epb_prev = eptrb;
658     eptrb = &newptrb;
659 nigel 77 }
660    
661 nigel 93 /* Now start processing the opcodes. */
662 nigel 77
663     for (;;)
664     {
665 nigel 93 minimize = possessive = FALSE;
666 nigel 77 op = *ecode;
667 ph10 406
668 nigel 77 /* For partial matching, remember if we ever hit the end of the subject after
669 ph10 426 matching at least one subject character. This code is now wrapped in a macro
670     because it appears several times below. */
671 nigel 77
672 ph10 426 CHECK_PARTIAL();
673 ph10 208
674 nigel 93 switch(op)
675     {
676 ph10 210 case OP_FAIL:
677 ph10 212 RRETURN(MATCH_NOMATCH);
678 ph10 211
679 ph10 210 case OP_PRUNE:
680     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
681     ims, eptrb, flags, RM51);
682     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
683 ph10 212 RRETURN(MATCH_PRUNE);
684 ph10 211
685 ph10 210 case OP_COMMIT:
686     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
687     ims, eptrb, flags, RM52);
688     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
689 ph10 212 RRETURN(MATCH_COMMIT);
690 ph10 211
691 ph10 210 case OP_SKIP:
692     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693     ims, eptrb, flags, RM53);
694     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
696 ph10 212 RRETURN(MATCH_SKIP);
697 ph10 211
698 ph10 210 case OP_THEN:
699     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
700 ph10 212 ims, eptrb, flags, RM54);
701 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
702 ph10 212 RRETURN(MATCH_THEN);
703 ph10 211
704 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
705     the current subject position in the working slot at the top of the vector.
706     We mustn't change the current values of the data slot, because they may be
707     set from a previous iteration of this group, and be referred to by a
708     reference inside the group.
709 nigel 77
710 nigel 93 If the bracket fails to match, we need to restore this value and also the
711     values of the final offsets, in case they were set by a previous iteration
712     of the same bracket.
713 nigel 77
714 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
715     a non-capturing bracket. Don't worry about setting the flag for the error
716     case here; that is handled in the code for KET. */
717 nigel 77
718 nigel 93 case OP_CBRA:
719     case OP_SCBRA:
720     number = GET2(ecode, 1+LINK_SIZE);
721 nigel 77 offset = number << 1;
722    
723     #ifdef DEBUG
724 nigel 93 printf("start bracket %d\n", number);
725     printf("subject=");
726 nigel 77 pchars(eptr, 16, TRUE, md);
727     printf("\n");
728     #endif
729    
730     if (offset < md->offset_max)
731     {
732     save_offset1 = md->offset_vector[offset];
733     save_offset2 = md->offset_vector[offset+1];
734     save_offset3 = md->offset_vector[md->offset_end - number];
735     save_capture_last = md->capture_last;
736    
737     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
738     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
739    
740 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
741 nigel 77 do
742     {
743 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744     ims, eptrb, flags, RM1);
745 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
746 nigel 77 md->capture_last = save_capture_last;
747     ecode += GET(ecode, 1);
748     }
749     while (*ecode == OP_ALT);
750    
751     DPRINTF(("bracket %d failed\n", number));
752    
753     md->offset_vector[offset] = save_offset1;
754     md->offset_vector[offset+1] = save_offset2;
755     md->offset_vector[md->offset_end - number] = save_offset3;
756    
757     RRETURN(MATCH_NOMATCH);
758     }
759    
760 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
761     as a non-capturing bracket. */
762 nigel 77
763 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
764     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
765    
766 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
767 nigel 77
768 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
769     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
770    
771 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
772     final alternative within the brackets, we would return the result of a
773     recursive call to match() whatever happened. We can reduce stack usage by
774 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
775     is set.*/
776 nigel 77
777 nigel 93 case OP_BRA:
778     case OP_SBRA:
779     DPRINTF(("start non-capturing bracket\n"));
780     flags = (op >= OP_SBRA)? match_cbegroup : 0;
781 nigel 91 for (;;)
782 nigel 77 {
783 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
784 nigel 93 {
785 ph10 197 if (flags == 0) /* Not a possibly empty group */
786     {
787     ecode += _pcre_OP_lengths[*ecode];
788     DPRINTF(("bracket 0 tail recursion\n"));
789     goto TAIL_RECURSE;
790     }
791    
792     /* Possibly empty group; can't use tail recursion. */
793    
794     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
795     eptrb, flags, RM48);
796     RRETURN(rrc);
797 nigel 93 }
798 nigel 91
799     /* For non-final alternatives, continue the loop for a NOMATCH result;
800     otherwise return. */
801    
802 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
803     eptrb, flags, RM2);
804 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805 nigel 77 ecode += GET(ecode, 1);
806     }
807 nigel 91 /* Control never reaches here. */
808 nigel 77
809     /* Conditional group: compilation checked that there are no more than
810     two branches. If the condition is false, skipping the first branch takes us
811     past the end if there is only one branch, but that's OK because that is
812 nigel 91 exactly what going to the ket would do. As there is only one branch to be
813     obeyed, we can use tail recursion to avoid using another stack frame. */
814 nigel 77
815     case OP_COND:
816 nigel 93 case OP_SCOND:
817 ph10 399 codelink= GET(ecode, 1);
818 ph10 406
819 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
820     inserted between OP_COND and an assertion condition. */
821 ph10 392
822 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
823     {
824     if (pcre_callout != NULL)
825     {
826     pcre_callout_block cb;
827     cb.version = 1; /* Version 1 of the callout block */
828     cb.callout_number = ecode[LINK_SIZE+2];
829     cb.offset_vector = md->offset_vector;
830     cb.subject = (PCRE_SPTR)md->start_subject;
831     cb.subject_length = md->end_subject - md->start_subject;
832     cb.start_match = mstart - md->start_subject;
833     cb.current_position = eptr - md->start_subject;
834     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
835     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
836     cb.capture_top = offset_top/2;
837     cb.capture_last = md->capture_last;
838     cb.callout_data = md->callout_data;
839     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
840     if (rrc < 0) RRETURN(rrc);
841     }
842     ecode += _pcre_OP_lengths[OP_CALLOUT];
843     }
844 ph10 392
845 ph10 399 condcode = ecode[LINK_SIZE+1];
846 ph10 406
847 ph10 381 /* Now see what the actual condition is */
848 ph10 392
849 ph10 399 if (condcode == OP_RREF) /* Recursion test */
850 nigel 77 {
851 nigel 93 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
852     condition = md->recursive != NULL &&
853     (offset == RREF_ANY || offset == md->recursive->group_num);
854     ecode += condition? 3 : GET(ecode, 1);
855     }
856    
857 ph10 399 else if (condcode == OP_CREF) /* Group used test */
858 nigel 93 {
859 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
860 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
861     ecode += condition? 3 : GET(ecode, 1);
862 nigel 77 }
863    
864 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
865 nigel 93 {
866     condition = FALSE;
867     ecode += GET(ecode, 1);
868     }
869    
870 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
871 nigel 93 the final argument match_condassert causes it to stop at the end of an
872     assertion. */
873 nigel 77
874     else
875     {
876 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
877     match_condassert, RM3);
878 nigel 77 if (rrc == MATCH_MATCH)
879     {
880 nigel 93 condition = TRUE;
881     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
882 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
883     }
884 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
885 nigel 77 {
886     RRETURN(rrc); /* Need braces because of following else */
887     }
888 nigel 93 else
889     {
890     condition = FALSE;
891 ph10 399 ecode += codelink;
892 nigel 93 }
893     }
894 nigel 91
895 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
896 ph10 197 we can use tail recursion to avoid using another stack frame, except when
897     match_cbegroup is required for an unlimited repeat of a possibly empty
898     group. If the second alternative doesn't exist, we can just plough on. */
899 nigel 91
900 nigel 93 if (condition || *ecode == OP_ALT)
901     {
902 nigel 91 ecode += 1 + LINK_SIZE;
903 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
904     {
905     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
906     RRETURN(rrc);
907     }
908     else /* Group must match something */
909     {
910     flags = 0;
911     goto TAIL_RECURSE;
912     }
913 nigel 77 }
914 ph10 395 else /* Condition false & no alternative */
915 nigel 93 {
916     ecode += 1 + LINK_SIZE;
917     }
918     break;
919 nigel 77
920    
921 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
922     recursion, we should restore the offsets appropriately and continue from
923     after the call. */
924 nigel 77
925 ph10 210 case OP_ACCEPT:
926 nigel 77 case OP_END:
927     if (md->recursive != NULL && md->recursive->group_num == 0)
928     {
929     recursion_info *rec = md->recursive;
930 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
931 nigel 77 md->recursive = rec->prevrec;
932     memmove(md->offset_vector, rec->offset_save,
933     rec->saved_max * sizeof(int));
934 ph10 168 mstart = rec->save_start;
935 nigel 77 ims = original_ims;
936     ecode = rec->after_call;
937     break;
938     }
939    
940     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
941     string - backtracking will then try other alternatives, if any. */
942    
943 ph10 168 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
944     md->end_match_ptr = eptr; /* Record where we ended */
945     md->end_offset_top = offset_top; /* and how many extracts were taken */
946 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
947 nigel 77 RRETURN(MATCH_MATCH);
948    
949     /* Change option settings */
950    
951     case OP_OPT:
952     ims = ecode[1];
953     ecode += 2;
954     DPRINTF(("ims set to %02lx\n", ims));
955     break;
956    
957     /* Assertion brackets. Check the alternative branches in turn - the
958     matching won't pass the KET for an assertion. If any one branch matches,
959     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
960     start of each branch to move the current point backwards, so the code at
961     this level is identical to the lookahead case. */
962    
963     case OP_ASSERT:
964     case OP_ASSERTBACK:
965     do
966     {
967 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
968     RM4);
969 nigel 77 if (rrc == MATCH_MATCH) break;
970 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
971 nigel 77 ecode += GET(ecode, 1);
972     }
973     while (*ecode == OP_ALT);
974     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
975    
976     /* If checking an assertion for a condition, return MATCH_MATCH. */
977    
978     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
979    
980     /* Continue from after the assertion, updating the offsets high water
981     mark, since extracts may have been taken during the assertion. */
982    
983     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
984     ecode += 1 + LINK_SIZE;
985     offset_top = md->end_offset_top;
986     continue;
987    
988     /* Negative assertion: all branches must fail to match */
989    
990     case OP_ASSERT_NOT:
991     case OP_ASSERTBACK_NOT:
992     do
993     {
994 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
995     RM5);
996 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
997 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
998 nigel 77 ecode += GET(ecode,1);
999     }
1000     while (*ecode == OP_ALT);
1001    
1002     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1003    
1004     ecode += 1 + LINK_SIZE;
1005     continue;
1006    
1007     /* Move the subject pointer back. This occurs only at the start of
1008     each branch of a lookbehind assertion. If we are too close to the start to
1009     move back, this match function fails. When working with UTF-8 we move
1010     back a number of characters, not bytes. */
1011    
1012     case OP_REVERSE:
1013     #ifdef SUPPORT_UTF8
1014     if (utf8)
1015     {
1016 nigel 93 i = GET(ecode, 1);
1017     while (i-- > 0)
1018 nigel 77 {
1019     eptr--;
1020     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1021 ph10 207 BACKCHAR(eptr);
1022 nigel 77 }
1023     }
1024     else
1025     #endif
1026    
1027     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1028    
1029     {
1030 nigel 93 eptr -= GET(ecode, 1);
1031 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1032     }
1033    
1034     /* Skip to next op code */
1035    
1036     ecode += 1 + LINK_SIZE;
1037     break;
1038    
1039     /* The callout item calls an external function, if one is provided, passing
1040     details of the match so far. This is mainly for debugging, though the
1041     function is able to force a failure. */
1042    
1043     case OP_CALLOUT:
1044     if (pcre_callout != NULL)
1045     {
1046     pcre_callout_block cb;
1047     cb.version = 1; /* Version 1 of the callout block */
1048     cb.callout_number = ecode[1];
1049     cb.offset_vector = md->offset_vector;
1050 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1051 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1052 ph10 168 cb.start_match = mstart - md->start_subject;
1053 nigel 77 cb.current_position = eptr - md->start_subject;
1054     cb.pattern_position = GET(ecode, 2);
1055     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1056     cb.capture_top = offset_top/2;
1057     cb.capture_last = md->capture_last;
1058     cb.callout_data = md->callout_data;
1059     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1060     if (rrc < 0) RRETURN(rrc);
1061     }
1062     ecode += 2 + 2*LINK_SIZE;
1063     break;
1064    
1065     /* Recursion either matches the current regex, or some subexpression. The
1066     offset data is the offset to the starting bracket from the start of the
1067     whole pattern. (This is so that it works from duplicated subpatterns.)
1068    
1069     If there are any capturing brackets started but not finished, we have to
1070     save their starting points and reinstate them after the recursion. However,
1071     we don't know how many such there are (offset_top records the completed
1072     total) so we just have to save all the potential data. There may be up to
1073     65535 such values, which is too large to put on the stack, but using malloc
1074     for small numbers seems expensive. As a compromise, the stack is used when
1075     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1076     is used. A problem is what to do if the malloc fails ... there is no way of
1077     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1078     values on the stack, and accept that the rest may be wrong.
1079    
1080     There are also other values that have to be saved. We use a chained
1081     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1082     for the original version of this logic. */
1083    
1084     case OP_RECURSE:
1085     {
1086     callpat = md->start_code + GET(ecode, 1);
1087 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1088     GET2(callpat, 1 + LINK_SIZE);
1089 nigel 77
1090     /* Add to "recursing stack" */
1091    
1092     new_recursive.prevrec = md->recursive;
1093     md->recursive = &new_recursive;
1094    
1095     /* Find where to continue from afterwards */
1096    
1097     ecode += 1 + LINK_SIZE;
1098     new_recursive.after_call = ecode;
1099    
1100     /* Now save the offset data. */
1101    
1102     new_recursive.saved_max = md->offset_end;
1103     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1104     new_recursive.offset_save = stacksave;
1105     else
1106     {
1107     new_recursive.offset_save =
1108     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1109     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1110     }
1111    
1112     memcpy(new_recursive.offset_save, md->offset_vector,
1113     new_recursive.saved_max * sizeof(int));
1114 ph10 168 new_recursive.save_start = mstart;
1115     mstart = eptr;
1116 nigel 77
1117     /* OK, now we can do the recursion. For each top-level alternative we
1118     restore the offset and recursion data. */
1119    
1120     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1121 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1122 nigel 77 do
1123     {
1124 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1125     md, ims, eptrb, flags, RM6);
1126 nigel 77 if (rrc == MATCH_MATCH)
1127     {
1128 nigel 87 DPRINTF(("Recursion matched\n"));
1129 nigel 77 md->recursive = new_recursive.prevrec;
1130     if (new_recursive.offset_save != stacksave)
1131     (pcre_free)(new_recursive.offset_save);
1132     RRETURN(MATCH_MATCH);
1133     }
1134 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1135 nigel 87 {
1136     DPRINTF(("Recursion gave error %d\n", rrc));
1137 ph10 400 if (new_recursive.offset_save != stacksave)
1138     (pcre_free)(new_recursive.offset_save);
1139 nigel 87 RRETURN(rrc);
1140     }
1141 nigel 77
1142     md->recursive = &new_recursive;
1143     memcpy(md->offset_vector, new_recursive.offset_save,
1144     new_recursive.saved_max * sizeof(int));
1145     callpat += GET(callpat, 1);
1146     }
1147     while (*callpat == OP_ALT);
1148    
1149     DPRINTF(("Recursion didn't match\n"));
1150     md->recursive = new_recursive.prevrec;
1151     if (new_recursive.offset_save != stacksave)
1152     (pcre_free)(new_recursive.offset_save);
1153     RRETURN(MATCH_NOMATCH);
1154     }
1155     /* Control never reaches here */
1156    
1157     /* "Once" brackets are like assertion brackets except that after a match,
1158     the point in the subject string is not moved back. Thus there can never be
1159     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1160     Check the alternative branches in turn - the matching won't pass the KET
1161     for this kind of subpattern. If any one branch matches, we carry on as at
1162     the end of a normal bracket, leaving the subject pointer. */
1163    
1164     case OP_ONCE:
1165 nigel 91 prev = ecode;
1166     saved_eptr = eptr;
1167    
1168     do
1169 nigel 77 {
1170 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1171 nigel 91 if (rrc == MATCH_MATCH) break;
1172 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1173 nigel 91 ecode += GET(ecode,1);
1174     }
1175     while (*ecode == OP_ALT);
1176 nigel 77
1177 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1178 nigel 77
1179 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1180 nigel 77
1181 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1182     mark, since extracts may have been taken. */
1183 nigel 77
1184 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1185 nigel 77
1186 nigel 91 offset_top = md->end_offset_top;
1187     eptr = md->end_match_ptr;
1188 nigel 77
1189 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1190     happens for a repeating ket if no characters were matched in the group.
1191     This is the forcible breaking of infinite loops as implemented in Perl
1192     5.005. If there is an options reset, it will get obeyed in the normal
1193     course of events. */
1194 nigel 77
1195 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1196     {
1197     ecode += 1+LINK_SIZE;
1198     break;
1199     }
1200 nigel 77
1201 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1202     preceding bracket, in the appropriate order. The second "call" of match()
1203     uses tail recursion, to avoid using another stack frame. We need to reset
1204     any options that changed within the bracket before re-running it, so
1205     check the next opcode. */
1206 nigel 77
1207 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1208     {
1209     ims = (ims & ~PCRE_IMS) | ecode[4];
1210     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1211     }
1212 nigel 77
1213 nigel 91 if (*ecode == OP_KETRMIN)
1214     {
1215 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1216 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1217     ecode = prev;
1218 ph10 197 flags = 0;
1219 nigel 91 goto TAIL_RECURSE;
1220 nigel 77 }
1221 nigel 91 else /* OP_KETRMAX */
1222     {
1223 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1224 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1225     ecode += 1 + LINK_SIZE;
1226 ph10 197 flags = 0;
1227 nigel 91 goto TAIL_RECURSE;
1228     }
1229     /* Control never gets here */
1230 nigel 77
1231     /* An alternation is the end of a branch; scan along to find the end of the
1232     bracketed group and go to there. */
1233    
1234     case OP_ALT:
1235     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1236     break;
1237    
1238 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1239     indicating that it may occur zero times. It may repeat infinitely, or not
1240     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1241     with fixed upper repeat limits are compiled as a number of copies, with the
1242     optional ones preceded by BRAZERO or BRAMINZERO. */
1243 nigel 77
1244     case OP_BRAZERO:
1245     {
1246     next = ecode+1;
1247 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1248 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249     do next += GET(next,1); while (*next == OP_ALT);
1250 nigel 93 ecode = next + 1 + LINK_SIZE;
1251 nigel 77 }
1252     break;
1253    
1254     case OP_BRAMINZERO:
1255     {
1256     next = ecode+1;
1257 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1258 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1259 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1260     ecode++;
1261     }
1262     break;
1263    
1264 ph10 335 case OP_SKIPZERO:
1265     {
1266     next = ecode+1;
1267     do next += GET(next,1); while (*next == OP_ALT);
1268     ecode = next + 1 + LINK_SIZE;
1269     }
1270     break;
1271    
1272 nigel 93 /* End of a group, repeated or non-repeating. */
1273 nigel 77
1274     case OP_KET:
1275     case OP_KETRMIN:
1276     case OP_KETRMAX:
1277 nigel 91 prev = ecode - GET(ecode, 1);
1278 nigel 77
1279 nigel 93 /* If this was a group that remembered the subject start, in order to break
1280     infinite repeats of empty string matches, retrieve the subject start from
1281     the chain. Otherwise, set it NULL. */
1282 nigel 77
1283 nigel 93 if (*prev >= OP_SBRA)
1284     {
1285     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1286     eptrb = eptrb->epb_prev; /* Backup to previous group */
1287     }
1288     else saved_eptr = NULL;
1289 nigel 77
1290 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1291     MATCH_MATCH, but record the current high water mark for use by positive
1292     assertions. Do this also for the "once" (atomic) groups. */
1293    
1294 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1295     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1296     *prev == OP_ONCE)
1297     {
1298     md->end_match_ptr = eptr; /* For ONCE */
1299     md->end_offset_top = offset_top;
1300     RRETURN(MATCH_MATCH);
1301     }
1302 nigel 77
1303 nigel 93 /* For capturing groups we have to check the group number back at the start
1304     and if necessary complete handling an extraction by setting the offsets and
1305     bumping the high water mark. Note that whole-pattern recursion is coded as
1306     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1307     when the OP_END is reached. Other recursion is handled here. */
1308 nigel 77
1309 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1310 nigel 91 {
1311 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1312 nigel 91 offset = number << 1;
1313 nigel 77
1314     #ifdef DEBUG
1315 nigel 91 printf("end bracket %d", number);
1316     printf("\n");
1317 nigel 77 #endif
1318    
1319 nigel 93 md->capture_last = number;
1320     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1321 nigel 91 {
1322 nigel 93 md->offset_vector[offset] =
1323     md->offset_vector[md->offset_end - number];
1324     md->offset_vector[offset+1] = eptr - md->start_subject;
1325     if (offset_top <= offset) offset_top = offset + 2;
1326     }
1327 nigel 77
1328 nigel 93 /* Handle a recursively called group. Restore the offsets
1329     appropriately and continue from after the call. */
1330 nigel 77
1331 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1332     {
1333     recursion_info *rec = md->recursive;
1334     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1335     md->recursive = rec->prevrec;
1336 ph10 168 mstart = rec->save_start;
1337 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1338     rec->saved_max * sizeof(int));
1339     ecode = rec->after_call;
1340     ims = original_ims;
1341     break;
1342 nigel 77 }
1343 nigel 91 }
1344 nigel 77
1345 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1346     flags, in case they got changed during the group. */
1347 nigel 77
1348 nigel 91 ims = original_ims;
1349     DPRINTF(("ims reset to %02lx\n", ims));
1350 nigel 77
1351 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1352     happens for a repeating ket if no characters were matched in the group.
1353     This is the forcible breaking of infinite loops as implemented in Perl
1354     5.005. If there is an options reset, it will get obeyed in the normal
1355     course of events. */
1356 nigel 77
1357 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1358     {
1359     ecode += 1 + LINK_SIZE;
1360     break;
1361     }
1362 nigel 77
1363 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1364     preceding bracket, in the appropriate order. In the second case, we can use
1365 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1366     unlimited repeat of a group that can match an empty string. */
1367 nigel 77
1368 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1369    
1370 nigel 91 if (*ecode == OP_KETRMIN)
1371     {
1372 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1373 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1374 ph10 197 if (flags != 0) /* Could match an empty string */
1375     {
1376     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1377     RRETURN(rrc);
1378     }
1379 nigel 91 ecode = prev;
1380     goto TAIL_RECURSE;
1381 nigel 77 }
1382 nigel 91 else /* OP_KETRMAX */
1383     {
1384 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1385 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1386     ecode += 1 + LINK_SIZE;
1387 ph10 197 flags = 0;
1388 nigel 91 goto TAIL_RECURSE;
1389     }
1390     /* Control never gets here */
1391 nigel 77
1392     /* Start of subject unless notbol, or after internal newline if multiline */
1393    
1394     case OP_CIRC:
1395     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1396     if ((ims & PCRE_MULTILINE) != 0)
1397     {
1398 nigel 91 if (eptr != md->start_subject &&
1399 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1400 nigel 77 RRETURN(MATCH_NOMATCH);
1401     ecode++;
1402     break;
1403     }
1404     /* ... else fall through */
1405    
1406     /* Start of subject assertion */
1407    
1408     case OP_SOD:
1409     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1410     ecode++;
1411     break;
1412    
1413     /* Start of match assertion */
1414    
1415     case OP_SOM:
1416     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1417     ecode++;
1418     break;
1419 ph10 172
1420 ph10 168 /* Reset the start of match point */
1421 ph10 172
1422 ph10 168 case OP_SET_SOM:
1423     mstart = eptr;
1424 ph10 172 ecode++;
1425     break;
1426 nigel 77
1427     /* Assert before internal newline if multiline, or before a terminating
1428     newline unless endonly is set, else end of subject unless noteol is set. */
1429    
1430     case OP_DOLL:
1431     if ((ims & PCRE_MULTILINE) != 0)
1432     {
1433     if (eptr < md->end_subject)
1434 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1435 nigel 77 else
1436     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1437     ecode++;
1438     break;
1439     }
1440     else
1441     {
1442     if (md->noteol) RRETURN(MATCH_NOMATCH);
1443     if (!md->endonly)
1444     {
1445 nigel 91 if (eptr != md->end_subject &&
1446 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1447 nigel 77 RRETURN(MATCH_NOMATCH);
1448     ecode++;
1449     break;
1450     }
1451     }
1452 nigel 91 /* ... else fall through for endonly */
1453 nigel 77
1454     /* End of subject assertion (\z) */
1455    
1456     case OP_EOD:
1457     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1458     ecode++;
1459     break;
1460    
1461     /* End of subject or ending \n assertion (\Z) */
1462    
1463     case OP_EODN:
1464 nigel 91 if (eptr != md->end_subject &&
1465 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1466 nigel 91 RRETURN(MATCH_NOMATCH);
1467 nigel 77 ecode++;
1468     break;
1469    
1470     /* Word boundary assertions */
1471    
1472     case OP_NOT_WORD_BOUNDARY:
1473     case OP_WORD_BOUNDARY:
1474     {
1475    
1476     /* Find out if the previous and current characters are "word" characters.
1477     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1478     be "non-word" characters. */
1479    
1480     #ifdef SUPPORT_UTF8
1481     if (utf8)
1482     {
1483     if (eptr == md->start_subject) prev_is_word = FALSE; else
1484     {
1485 ph10 409 USPTR lastptr = eptr - 1;
1486 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1487     GETCHAR(c, lastptr);
1488     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1489     }
1490     if (eptr >= md->end_subject) cur_is_word = FALSE; else
1491     {
1492     GETCHAR(c, eptr);
1493     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1494     }
1495     }
1496     else
1497     #endif
1498    
1499     /* More streamlined when not in UTF-8 mode */
1500    
1501     {
1502     prev_is_word = (eptr != md->start_subject) &&
1503     ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1504     cur_is_word = (eptr < md->end_subject) &&
1505     ((md->ctypes[*eptr] & ctype_word) != 0);
1506     }
1507    
1508     /* Now see if the situation is what we want */
1509    
1510     if ((*ecode++ == OP_WORD_BOUNDARY)?
1511     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1512     RRETURN(MATCH_NOMATCH);
1513     }
1514     break;
1515    
1516     /* Match a single character type; inline for speed */
1517    
1518     case OP_ANY:
1519 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1520 ph10 345 /* Fall through */
1521    
1522 ph10 341 case OP_ALLANY:
1523 nigel 77 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1524 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1525 nigel 77 ecode++;
1526     break;
1527    
1528     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1529     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1530    
1531     case OP_ANYBYTE:
1532     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1533     ecode++;
1534     break;
1535    
1536     case OP_NOT_DIGIT:
1537     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1538     GETCHARINCTEST(c, eptr);
1539     if (
1540     #ifdef SUPPORT_UTF8
1541     c < 256 &&
1542     #endif
1543     (md->ctypes[c] & ctype_digit) != 0
1544     )
1545     RRETURN(MATCH_NOMATCH);
1546     ecode++;
1547     break;
1548    
1549     case OP_DIGIT:
1550     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1551     GETCHARINCTEST(c, eptr);
1552     if (
1553     #ifdef SUPPORT_UTF8
1554     c >= 256 ||
1555     #endif
1556     (md->ctypes[c] & ctype_digit) == 0
1557     )
1558     RRETURN(MATCH_NOMATCH);
1559     ecode++;
1560     break;
1561    
1562     case OP_NOT_WHITESPACE:
1563     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1564     GETCHARINCTEST(c, eptr);
1565     if (
1566     #ifdef SUPPORT_UTF8
1567     c < 256 &&
1568     #endif
1569     (md->ctypes[c] & ctype_space) != 0
1570     )
1571     RRETURN(MATCH_NOMATCH);
1572     ecode++;
1573     break;
1574    
1575     case OP_WHITESPACE:
1576     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1577     GETCHARINCTEST(c, eptr);
1578     if (
1579     #ifdef SUPPORT_UTF8
1580     c >= 256 ||
1581     #endif
1582     (md->ctypes[c] & ctype_space) == 0
1583     )
1584     RRETURN(MATCH_NOMATCH);
1585     ecode++;
1586     break;
1587    
1588     case OP_NOT_WORDCHAR:
1589     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1590     GETCHARINCTEST(c, eptr);
1591     if (
1592     #ifdef SUPPORT_UTF8
1593     c < 256 &&
1594     #endif
1595     (md->ctypes[c] & ctype_word) != 0
1596     )
1597     RRETURN(MATCH_NOMATCH);
1598     ecode++;
1599     break;
1600    
1601     case OP_WORDCHAR:
1602     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1603     GETCHARINCTEST(c, eptr);
1604     if (
1605     #ifdef SUPPORT_UTF8
1606     c >= 256 ||
1607     #endif
1608     (md->ctypes[c] & ctype_word) == 0
1609     )
1610     RRETURN(MATCH_NOMATCH);
1611     ecode++;
1612     break;
1613    
1614 nigel 93 case OP_ANYNL:
1615     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1616     GETCHARINCTEST(c, eptr);
1617     switch(c)
1618     {
1619     default: RRETURN(MATCH_NOMATCH);
1620     case 0x000d:
1621     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1622     break;
1623 ph10 231
1624 nigel 93 case 0x000a:
1625 ph10 231 break;
1626    
1627 nigel 93 case 0x000b:
1628     case 0x000c:
1629     case 0x0085:
1630     case 0x2028:
1631     case 0x2029:
1632 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1633 nigel 93 break;
1634     }
1635     ecode++;
1636     break;
1637    
1638 ph10 178 case OP_NOT_HSPACE:
1639     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1640     GETCHARINCTEST(c, eptr);
1641     switch(c)
1642     {
1643     default: break;
1644     case 0x09: /* HT */
1645     case 0x20: /* SPACE */
1646     case 0xa0: /* NBSP */
1647     case 0x1680: /* OGHAM SPACE MARK */
1648     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1649     case 0x2000: /* EN QUAD */
1650     case 0x2001: /* EM QUAD */
1651     case 0x2002: /* EN SPACE */
1652     case 0x2003: /* EM SPACE */
1653     case 0x2004: /* THREE-PER-EM SPACE */
1654     case 0x2005: /* FOUR-PER-EM SPACE */
1655     case 0x2006: /* SIX-PER-EM SPACE */
1656     case 0x2007: /* FIGURE SPACE */
1657     case 0x2008: /* PUNCTUATION SPACE */
1658     case 0x2009: /* THIN SPACE */
1659     case 0x200A: /* HAIR SPACE */
1660     case 0x202f: /* NARROW NO-BREAK SPACE */
1661     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1662     case 0x3000: /* IDEOGRAPHIC SPACE */
1663     RRETURN(MATCH_NOMATCH);
1664     }
1665     ecode++;
1666     break;
1667    
1668     case OP_HSPACE:
1669     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1670     GETCHARINCTEST(c, eptr);
1671     switch(c)
1672     {
1673     default: RRETURN(MATCH_NOMATCH);
1674     case 0x09: /* HT */
1675     case 0x20: /* SPACE */
1676     case 0xa0: /* NBSP */
1677     case 0x1680: /* OGHAM SPACE MARK */
1678     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1679     case 0x2000: /* EN QUAD */
1680     case 0x2001: /* EM QUAD */
1681     case 0x2002: /* EN SPACE */
1682     case 0x2003: /* EM SPACE */
1683     case 0x2004: /* THREE-PER-EM SPACE */
1684     case 0x2005: /* FOUR-PER-EM SPACE */
1685     case 0x2006: /* SIX-PER-EM SPACE */
1686     case 0x2007: /* FIGURE SPACE */
1687     case 0x2008: /* PUNCTUATION SPACE */
1688     case 0x2009: /* THIN SPACE */
1689     case 0x200A: /* HAIR SPACE */
1690     case 0x202f: /* NARROW NO-BREAK SPACE */
1691     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1692     case 0x3000: /* IDEOGRAPHIC SPACE */
1693     break;
1694     }
1695     ecode++;
1696     break;
1697    
1698     case OP_NOT_VSPACE:
1699     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700     GETCHARINCTEST(c, eptr);
1701     switch(c)
1702     {
1703     default: break;
1704     case 0x0a: /* LF */
1705     case 0x0b: /* VT */
1706     case 0x0c: /* FF */
1707     case 0x0d: /* CR */
1708     case 0x85: /* NEL */
1709     case 0x2028: /* LINE SEPARATOR */
1710     case 0x2029: /* PARAGRAPH SEPARATOR */
1711     RRETURN(MATCH_NOMATCH);
1712     }
1713     ecode++;
1714     break;
1715    
1716     case OP_VSPACE:
1717     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1718     GETCHARINCTEST(c, eptr);
1719     switch(c)
1720     {
1721     default: RRETURN(MATCH_NOMATCH);
1722     case 0x0a: /* LF */
1723     case 0x0b: /* VT */
1724     case 0x0c: /* FF */
1725     case 0x0d: /* CR */
1726     case 0x85: /* NEL */
1727     case 0x2028: /* LINE SEPARATOR */
1728     case 0x2029: /* PARAGRAPH SEPARATOR */
1729     break;
1730     }
1731     ecode++;
1732     break;
1733    
1734 nigel 77 #ifdef SUPPORT_UCP
1735     /* Check the next character by Unicode property. We will get here only
1736     if the support is in the binary; otherwise a compile-time error occurs. */
1737    
1738     case OP_PROP:
1739     case OP_NOTPROP:
1740     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1741     GETCHARINCTEST(c, eptr);
1742     {
1743 ph10 384 const ucd_record *prop = GET_UCD(c);
1744 nigel 77
1745 nigel 87 switch(ecode[1])
1746     {
1747     case PT_ANY:
1748     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1749     break;
1750 nigel 77
1751 nigel 87 case PT_LAMP:
1752 ph10 349 if ((prop->chartype == ucp_Lu ||
1753     prop->chartype == ucp_Ll ||
1754     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1755 nigel 77 RRETURN(MATCH_NOMATCH);
1756 nigel 87 break;
1757    
1758     case PT_GC:
1759 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1760 nigel 77 RRETURN(MATCH_NOMATCH);
1761 nigel 87 break;
1762    
1763     case PT_PC:
1764 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1765 nigel 87 RRETURN(MATCH_NOMATCH);
1766     break;
1767    
1768     case PT_SC:
1769 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
1770 nigel 87 RRETURN(MATCH_NOMATCH);
1771     break;
1772    
1773     default:
1774     RRETURN(PCRE_ERROR_INTERNAL);
1775 nigel 77 }
1776 nigel 87
1777     ecode += 3;
1778 nigel 77 }
1779     break;
1780    
1781     /* Match an extended Unicode sequence. We will get here only if the support
1782     is in the binary; otherwise a compile-time error occurs. */
1783    
1784     case OP_EXTUNI:
1785     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1786     GETCHARINCTEST(c, eptr);
1787     {
1788 ph10 349 int category = UCD_CATEGORY(c);
1789 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1790     while (eptr < md->end_subject)
1791     {
1792     int len = 1;
1793     if (!utf8) c = *eptr; else
1794     {
1795     GETCHARLEN(c, eptr, len);
1796     }
1797 ph10 349 category = UCD_CATEGORY(c);
1798 nigel 77 if (category != ucp_M) break;
1799     eptr += len;
1800     }
1801     }
1802     ecode++;
1803     break;
1804     #endif
1805    
1806    
1807     /* Match a back reference, possibly repeatedly. Look past the end of the
1808     item to see if there is repeat information following. The code is similar
1809     to that for character classes, but repeated for efficiency. Then obey
1810     similar code to character type repeats - written out again for speed.
1811     However, if the referenced string is the empty string, always treat
1812     it as matched, any number of times (otherwise there could be infinite
1813     loops). */
1814    
1815     case OP_REF:
1816     {
1817     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1818 ph10 345 ecode += 3;
1819    
1820 ph10 336 /* If the reference is unset, there are two possibilities:
1821 ph10 345
1822 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
1823     than the amount of subject left; this ensures that every attempt at a
1824     match fails. We can't just fail here, because of the possibility of
1825     quantifiers with zero minima.
1826 ph10 345
1827     (b) If the JavaScript compatibility flag is set, set the length to zero
1828     so that the back reference matches an empty string.
1829    
1830     Otherwise, set the length to the length of what was matched by the
1831 ph10 336 referenced subpattern. */
1832 ph10 345
1833 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
1834 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1835 ph10 336 else
1836     length = md->offset_vector[offset+1] - md->offset_vector[offset];
1837 nigel 77
1838     /* Set up for repetition, or handle the non-repeated case */
1839    
1840     switch (*ecode)
1841     {
1842     case OP_CRSTAR:
1843     case OP_CRMINSTAR:
1844     case OP_CRPLUS:
1845     case OP_CRMINPLUS:
1846     case OP_CRQUERY:
1847     case OP_CRMINQUERY:
1848     c = *ecode++ - OP_CRSTAR;
1849     minimize = (c & 1) != 0;
1850     min = rep_min[c]; /* Pick up values from tables; */
1851     max = rep_max[c]; /* zero for max => infinity */
1852     if (max == 0) max = INT_MAX;
1853     break;
1854    
1855     case OP_CRRANGE:
1856     case OP_CRMINRANGE:
1857     minimize = (*ecode == OP_CRMINRANGE);
1858     min = GET2(ecode, 1);
1859     max = GET2(ecode, 3);
1860     if (max == 0) max = INT_MAX;
1861     ecode += 5;
1862     break;
1863    
1864     default: /* No repeat follows */
1865     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1866     eptr += length;
1867     continue; /* With the main loop */
1868     }
1869    
1870     /* If the length of the reference is zero, just continue with the
1871     main loop. */
1872    
1873     if (length == 0) continue;
1874    
1875     /* First, ensure the minimum number of matches are present. We get back
1876     the length of the reference string explicitly rather than passing the
1877     address of eptr, so that eptr can be a register variable. */
1878    
1879     for (i = 1; i <= min; i++)
1880     {
1881 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
1882 ph10 426 {
1883 ph10 427 CHECK_PARTIAL();
1884 ph10 426 RRETURN(MATCH_NOMATCH);
1885 ph10 427 }
1886 nigel 77 eptr += length;
1887     }
1888    
1889     /* If min = max, continue at the same level without recursion.
1890     They are not both allowed to be zero. */
1891    
1892     if (min == max) continue;
1893    
1894     /* If minimizing, keep trying and advancing the pointer */
1895    
1896     if (minimize)
1897     {
1898     for (fi = min;; fi++)
1899     {
1900 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1901 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1902     if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1903 ph10 426 {
1904 ph10 427 CHECK_PARTIAL();
1905 nigel 77 RRETURN(MATCH_NOMATCH);
1906 ph10 427 }
1907 nigel 77 eptr += length;
1908     }
1909     /* Control never gets here */
1910     }
1911    
1912     /* If maximizing, find the longest string and work backwards */
1913    
1914     else
1915     {
1916     pp = eptr;
1917     for (i = min; i < max; i++)
1918     {
1919     if (!match_ref(offset, eptr, length, md, ims)) break;
1920     eptr += length;
1921     }
1922 ph10 427 CHECK_PARTIAL();
1923 nigel 77 while (eptr >= pp)
1924     {
1925 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1926 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1927     eptr -= length;
1928     }
1929     RRETURN(MATCH_NOMATCH);
1930     }
1931     }
1932     /* Control never gets here */
1933    
1934    
1935    
1936     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1937     used when all the characters in the class have values in the range 0-255,
1938     and either the matching is caseful, or the characters are in the range
1939     0-127 when UTF-8 processing is enabled. The only difference between
1940     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1941     encountered.
1942    
1943     First, look past the end of the item to see if there is repeat information
1944     following. Then obey similar code to character type repeats - written out
1945     again for speed. */
1946    
1947     case OP_NCLASS:
1948     case OP_CLASS:
1949     {
1950     data = ecode + 1; /* Save for matching */
1951     ecode += 33; /* Advance past the item */
1952    
1953     switch (*ecode)
1954     {
1955     case OP_CRSTAR:
1956     case OP_CRMINSTAR:
1957     case OP_CRPLUS:
1958     case OP_CRMINPLUS:
1959     case OP_CRQUERY:
1960     case OP_CRMINQUERY:
1961     c = *ecode++ - OP_CRSTAR;
1962     minimize = (c & 1) != 0;
1963     min = rep_min[c]; /* Pick up values from tables; */
1964     max = rep_max[c]; /* zero for max => infinity */
1965     if (max == 0) max = INT_MAX;
1966     break;
1967    
1968     case OP_CRRANGE:
1969     case OP_CRMINRANGE:
1970     minimize = (*ecode == OP_CRMINRANGE);
1971     min = GET2(ecode, 1);
1972     max = GET2(ecode, 3);
1973     if (max == 0) max = INT_MAX;
1974     ecode += 5;
1975     break;
1976    
1977     default: /* No repeat follows */
1978     min = max = 1;
1979     break;
1980     }
1981    
1982     /* First, ensure the minimum number of matches are present. */
1983    
1984     #ifdef SUPPORT_UTF8
1985     /* UTF-8 mode */
1986     if (utf8)
1987     {
1988     for (i = 1; i <= min; i++)
1989     {
1990 ph10 427 if (eptr >= md->end_subject)
1991 ph10 426 {
1992     CHECK_PARTIAL();
1993     RRETURN(MATCH_NOMATCH);
1994 ph10 427 }
1995 nigel 77 GETCHARINC(c, eptr);
1996     if (c > 255)
1997     {
1998     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1999     }
2000     else
2001     {
2002     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2003     }
2004     }
2005     }
2006     else
2007     #endif
2008     /* Not UTF-8 mode */
2009     {
2010     for (i = 1; i <= min; i++)
2011     {
2012 ph10 427 if (eptr >= md->end_subject)
2013 ph10 426 {
2014     CHECK_PARTIAL();
2015     RRETURN(MATCH_NOMATCH);
2016 ph10 427 }
2017 nigel 77 c = *eptr++;
2018     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2019     }
2020     }
2021    
2022     /* If max == min we can continue with the main loop without the
2023     need to recurse. */
2024    
2025     if (min == max) continue;
2026    
2027     /* If minimizing, keep testing the rest of the expression and advancing
2028     the pointer while it matches the class. */
2029    
2030     if (minimize)
2031     {
2032     #ifdef SUPPORT_UTF8
2033     /* UTF-8 mode */
2034     if (utf8)
2035     {
2036     for (fi = min;; fi++)
2037     {
2038 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2039 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2040 ph10 427 if (fi >= max)
2041 ph10 426 {
2042 ph10 427 CHECK_PARTIAL();
2043 ph10 426 RRETURN(MATCH_NOMATCH);
2044 ph10 427 }
2045     if (eptr >= md->end_subject)
2046 ph10 426 {
2047 ph10 427 SCHECK_PARTIAL();
2048 ph10 426 RRETURN(MATCH_NOMATCH);
2049 ph10 427 }
2050 nigel 77 GETCHARINC(c, eptr);
2051     if (c > 255)
2052     {
2053     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2054     }
2055     else
2056     {
2057     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2058     }
2059     }
2060     }
2061     else
2062     #endif
2063     /* Not UTF-8 mode */
2064     {
2065     for (fi = min;; fi++)
2066     {
2067 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2068 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069 ph10 427 if (fi >= max)
2070 ph10 426 {
2071 ph10 427 CHECK_PARTIAL();
2072 ph10 426 RRETURN(MATCH_NOMATCH);
2073 ph10 427 }
2074     if (eptr >= md->end_subject)
2075 ph10 426 {
2076 ph10 427 SCHECK_PARTIAL();
2077 ph10 426 RRETURN(MATCH_NOMATCH);
2078 ph10 427 }
2079 nigel 77 c = *eptr++;
2080     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2081     }
2082     }
2083     /* Control never gets here */
2084     }
2085    
2086     /* If maximizing, find the longest possible run, then work backwards. */
2087    
2088     else
2089     {
2090     pp = eptr;
2091    
2092     #ifdef SUPPORT_UTF8
2093     /* UTF-8 mode */
2094     if (utf8)
2095     {
2096     for (i = min; i < max; i++)
2097     {
2098     int len = 1;
2099     if (eptr >= md->end_subject) break;
2100     GETCHARLEN(c, eptr, len);
2101     if (c > 255)
2102     {
2103     if (op == OP_CLASS) break;
2104     }
2105     else
2106     {
2107     if ((data[c/8] & (1 << (c&7))) == 0) break;
2108     }
2109     eptr += len;
2110     }
2111 ph10 427 CHECK_PARTIAL();
2112 nigel 77 for (;;)
2113     {
2114 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2115 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2116     if (eptr-- == pp) break; /* Stop if tried at original pos */
2117     BACKCHAR(eptr);
2118     }
2119     }
2120     else
2121     #endif
2122     /* Not UTF-8 mode */
2123     {
2124     for (i = min; i < max; i++)
2125     {
2126     if (eptr >= md->end_subject) break;
2127     c = *eptr;
2128     if ((data[c/8] & (1 << (c&7))) == 0) break;
2129     eptr++;
2130     }
2131 ph10 427 CHECK_PARTIAL();
2132 nigel 77 while (eptr >= pp)
2133     {
2134 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2135 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2136 nigel 77 eptr--;
2137     }
2138     }
2139    
2140     RRETURN(MATCH_NOMATCH);
2141     }
2142     }
2143     /* Control never gets here */
2144    
2145    
2146     /* Match an extended character class. This opcode is encountered only
2147 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2148     mode, because Unicode properties are supported in non-UTF-8 mode. */
2149 nigel 77
2150     #ifdef SUPPORT_UTF8
2151     case OP_XCLASS:
2152     {
2153     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2154     ecode += GET(ecode, 1); /* Advance past the item */
2155    
2156     switch (*ecode)
2157     {
2158     case OP_CRSTAR:
2159     case OP_CRMINSTAR:
2160     case OP_CRPLUS:
2161     case OP_CRMINPLUS:
2162     case OP_CRQUERY:
2163     case OP_CRMINQUERY:
2164     c = *ecode++ - OP_CRSTAR;
2165     minimize = (c & 1) != 0;
2166     min = rep_min[c]; /* Pick up values from tables; */
2167     max = rep_max[c]; /* zero for max => infinity */
2168     if (max == 0) max = INT_MAX;
2169     break;
2170    
2171     case OP_CRRANGE:
2172     case OP_CRMINRANGE:
2173     minimize = (*ecode == OP_CRMINRANGE);
2174     min = GET2(ecode, 1);
2175     max = GET2(ecode, 3);
2176     if (max == 0) max = INT_MAX;
2177     ecode += 5;
2178     break;
2179    
2180     default: /* No repeat follows */
2181     min = max = 1;
2182     break;
2183     }
2184    
2185     /* First, ensure the minimum number of matches are present. */
2186    
2187     for (i = 1; i <= min; i++)
2188     {
2189 ph10 427 if (eptr >= md->end_subject)
2190 ph10 426 {
2191     SCHECK_PARTIAL();
2192     RRETURN(MATCH_NOMATCH);
2193 ph10 427 }
2194 ph10 384 GETCHARINCTEST(c, eptr);
2195 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2196     }
2197    
2198     /* If max == min we can continue with the main loop without the
2199     need to recurse. */
2200    
2201     if (min == max) continue;
2202    
2203     /* If minimizing, keep testing the rest of the expression and advancing
2204     the pointer while it matches the class. */
2205    
2206     if (minimize)
2207     {
2208     for (fi = min;; fi++)
2209     {
2210 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2211 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212 ph10 427 if (fi >= max)
2213 ph10 426 {
2214 ph10 427 CHECK_PARTIAL();
2215 ph10 426 RRETURN(MATCH_NOMATCH);
2216 ph10 427 }
2217     if (eptr >= md->end_subject)
2218 ph10 426 {
2219 ph10 427 SCHECK_PARTIAL();
2220 ph10 426 RRETURN(MATCH_NOMATCH);
2221 ph10 427 }
2222 ph10 384 GETCHARINCTEST(c, eptr);
2223 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2224     }
2225     /* Control never gets here */
2226     }
2227    
2228     /* If maximizing, find the longest possible run, then work backwards. */
2229    
2230     else
2231     {
2232     pp = eptr;
2233     for (i = min; i < max; i++)
2234     {
2235     int len = 1;
2236     if (eptr >= md->end_subject) break;
2237 ph10 384 GETCHARLENTEST(c, eptr, len);
2238 nigel 77 if (!_pcre_xclass(c, data)) break;
2239     eptr += len;
2240     }
2241 ph10 427 CHECK_PARTIAL();
2242 nigel 77 for(;;)
2243     {
2244 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2245 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2246     if (eptr-- == pp) break; /* Stop if tried at original pos */
2247 ph10 214 if (utf8) BACKCHAR(eptr);
2248 nigel 77 }
2249     RRETURN(MATCH_NOMATCH);
2250     }
2251    
2252     /* Control never gets here */
2253     }
2254     #endif /* End of XCLASS */
2255    
2256     /* Match a single character, casefully */
2257    
2258     case OP_CHAR:
2259     #ifdef SUPPORT_UTF8
2260     if (utf8)
2261     {
2262     length = 1;
2263     ecode++;
2264     GETCHARLEN(fc, ecode, length);
2265     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2266     while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2267     }
2268     else
2269     #endif
2270    
2271     /* Non-UTF-8 mode */
2272     {
2273     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2274     if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2275     ecode += 2;
2276     }
2277     break;
2278    
2279     /* Match a single character, caselessly */
2280    
2281     case OP_CHARNC:
2282     #ifdef SUPPORT_UTF8
2283     if (utf8)
2284     {
2285     length = 1;
2286     ecode++;
2287     GETCHARLEN(fc, ecode, length);
2288    
2289     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2290    
2291     /* If the pattern character's value is < 128, we have only one byte, and
2292     can use the fast lookup table. */
2293    
2294     if (fc < 128)
2295     {
2296     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2297     }
2298    
2299     /* Otherwise we must pick up the subject character */
2300    
2301     else
2302     {
2303 nigel 93 unsigned int dc;
2304 nigel 77 GETCHARINC(dc, eptr);
2305     ecode += length;
2306    
2307     /* If we have Unicode property support, we can use it to test the other
2308 nigel 87 case of the character, if there is one. */
2309 nigel 77
2310     if (fc != dc)
2311     {
2312     #ifdef SUPPORT_UCP
2313 ph10 349 if (dc != UCD_OTHERCASE(fc))
2314 nigel 77 #endif
2315     RRETURN(MATCH_NOMATCH);
2316     }
2317     }
2318     }
2319     else
2320     #endif /* SUPPORT_UTF8 */
2321    
2322     /* Non-UTF-8 mode */
2323     {
2324     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2325     if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2326     ecode += 2;
2327     }
2328     break;
2329    
2330 nigel 93 /* Match a single character repeatedly. */
2331 nigel 77
2332     case OP_EXACT:
2333     min = max = GET2(ecode, 1);
2334     ecode += 3;
2335     goto REPEATCHAR;
2336    
2337 nigel 93 case OP_POSUPTO:
2338     possessive = TRUE;
2339     /* Fall through */
2340    
2341 nigel 77 case OP_UPTO:
2342     case OP_MINUPTO:
2343     min = 0;
2344     max = GET2(ecode, 1);
2345     minimize = *ecode == OP_MINUPTO;
2346     ecode += 3;
2347     goto REPEATCHAR;
2348    
2349 nigel 93 case OP_POSSTAR:
2350     possessive = TRUE;
2351     min = 0;
2352     max = INT_MAX;
2353     ecode++;
2354     goto REPEATCHAR;
2355    
2356     case OP_POSPLUS:
2357     possessive = TRUE;
2358     min = 1;
2359     max = INT_MAX;
2360     ecode++;
2361     goto REPEATCHAR;
2362    
2363     case OP_POSQUERY:
2364     possessive = TRUE;
2365     min = 0;
2366     max = 1;
2367     ecode++;
2368     goto REPEATCHAR;
2369    
2370 nigel 77 case OP_STAR:
2371     case OP_MINSTAR:
2372     case OP_PLUS:
2373     case OP_MINPLUS:
2374     case OP_QUERY:
2375     case OP_MINQUERY:
2376     c = *ecode++ - OP_STAR;
2377     minimize = (c & 1) != 0;
2378     min = rep_min[c]; /* Pick up values from tables; */
2379     max = rep_max[c]; /* zero for max => infinity */
2380     if (max == 0) max = INT_MAX;
2381    
2382 ph10 426 /* Common code for all repeated single-character matches. */
2383 nigel 77
2384     REPEATCHAR:
2385     #ifdef SUPPORT_UTF8
2386     if (utf8)
2387     {
2388     length = 1;
2389     charptr = ecode;
2390     GETCHARLEN(fc, ecode, length);
2391     ecode += length;
2392    
2393     /* Handle multibyte character matching specially here. There is
2394     support for caseless matching if UCP support is present. */
2395    
2396     if (length > 1)
2397     {
2398     #ifdef SUPPORT_UCP
2399 nigel 93 unsigned int othercase;
2400 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2401 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2402 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2403 ph10 115 else oclength = 0;
2404 nigel 77 #endif /* SUPPORT_UCP */
2405    
2406     for (i = 1; i <= min; i++)
2407     {
2408 ph10 426 if (eptr <= md->end_subject - length &&
2409     memcmp(eptr, charptr, length) == 0) eptr += length;
2410 ph10 123 #ifdef SUPPORT_UCP
2411 ph10 426 else if (oclength > 0 &&
2412     eptr <= md->end_subject - oclength &&
2413     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2414     #endif /* SUPPORT_UCP */
2415 nigel 77 else
2416     {
2417 ph10 426 CHECK_PARTIAL();
2418     RRETURN(MATCH_NOMATCH);
2419 nigel 77 }
2420     }
2421    
2422     if (min == max) continue;
2423    
2424     if (minimize)
2425     {
2426     for (fi = min;; fi++)
2427     {
2428 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2429 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430 ph10 426 if (fi >= max)
2431     {
2432     CHECK_PARTIAL();
2433     RRETURN(MATCH_NOMATCH);
2434     }
2435     if (eptr <= md->end_subject - length &&
2436     memcmp(eptr, charptr, length) == 0) eptr += length;
2437 ph10 123 #ifdef SUPPORT_UCP
2438 ph10 426 else if (oclength > 0 &&
2439     eptr <= md->end_subject - oclength &&
2440     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2441     #endif /* SUPPORT_UCP */
2442 nigel 77 else
2443     {
2444 ph10 426 CHECK_PARTIAL();
2445     RRETURN(MATCH_NOMATCH);
2446 nigel 77 }
2447     }
2448     /* Control never gets here */
2449     }
2450 nigel 93
2451     else /* Maximize */
2452 nigel 77 {
2453     pp = eptr;
2454     for (i = min; i < max; i++)
2455     {
2456 ph10 426 if (eptr <= md->end_subject - length &&
2457     memcmp(eptr, charptr, length) == 0) eptr += length;
2458 ph10 123 #ifdef SUPPORT_UCP
2459 ph10 426 else if (oclength > 0 &&
2460     eptr <= md->end_subject - oclength &&
2461     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2462     #endif /* SUPPORT_UCP */
2463 ph10 115 else break;
2464 nigel 77 }
2465 nigel 93
2466 ph10 426 CHECK_PARTIAL();
2467 nigel 93 if (possessive) continue;
2468 ph10 427
2469 ph10 120 for(;;)
2470 ph10 426 {
2471     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2472     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2474 ph10 115 #ifdef SUPPORT_UCP
2475 ph10 426 eptr--;
2476     BACKCHAR(eptr);
2477 ph10 123 #else /* without SUPPORT_UCP */
2478 ph10 426 eptr -= length;
2479 ph10 123 #endif /* SUPPORT_UCP */
2480 ph10 426 }
2481 nigel 77 }
2482     /* Control never gets here */
2483     }
2484    
2485     /* If the length of a UTF-8 character is 1, we fall through here, and
2486     obey the code as for non-UTF-8 characters below, though in this case the
2487     value of fc will always be < 128. */
2488     }
2489     else
2490     #endif /* SUPPORT_UTF8 */
2491    
2492     /* When not in UTF-8 mode, load a single-byte character. */
2493    
2494 ph10 426 fc = *ecode++;
2495    
2496 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2497     may not be in UTF-8 mode. The code is duplicated for the caseless and
2498     caseful cases, for speed, since matching characters is likely to be quite
2499     common. First, ensure the minimum number of matches are present. If min =
2500     max, continue at the same level without recursing. Otherwise, if
2501     minimizing, keep trying the rest of the expression and advancing one
2502     matching character if failing, up to the maximum. Alternatively, if
2503     maximizing, find the maximum number of characters and work backwards. */
2504    
2505     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2506     max, eptr));
2507    
2508     if ((ims & PCRE_CASELESS) != 0)
2509     {
2510     fc = md->lcc[fc];
2511     for (i = 1; i <= min; i++)
2512 ph10 426 {
2513     if (eptr >= md->end_subject)
2514     {
2515     SCHECK_PARTIAL();
2516     RRETURN(MATCH_NOMATCH);
2517     }
2518 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2519 ph10 426 }
2520 nigel 77 if (min == max) continue;
2521     if (minimize)
2522     {
2523     for (fi = min;; fi++)
2524     {
2525 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2526 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527 ph10 426 if (fi >= max)
2528     {
2529 ph10 427 CHECK_PARTIAL();
2530 nigel 77 RRETURN(MATCH_NOMATCH);
2531 ph10 426 }
2532     if (eptr >= md->end_subject)
2533     {
2534 ph10 427 SCHECK_PARTIAL();
2535 ph10 426 RRETURN(MATCH_NOMATCH);
2536     }
2537     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2538 nigel 77 }
2539     /* Control never gets here */
2540     }
2541 nigel 93 else /* Maximize */
2542 nigel 77 {
2543     pp = eptr;
2544     for (i = min; i < max; i++)
2545     {
2546     if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2547     eptr++;
2548     }
2549 ph10 427
2550     CHECK_PARTIAL();
2551 nigel 93 if (possessive) continue;
2552 ph10 427
2553 nigel 77 while (eptr >= pp)
2554     {
2555 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2556 nigel 77 eptr--;
2557     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2558     }
2559     RRETURN(MATCH_NOMATCH);
2560     }
2561     /* Control never gets here */
2562     }
2563    
2564     /* Caseful comparisons (includes all multi-byte characters) */
2565    
2566     else
2567     {
2568 ph10 427 for (i = 1; i <= min; i++)
2569 ph10 426 {
2570     if (eptr >= md->end_subject)
2571     {
2572     SCHECK_PARTIAL();
2573     RRETURN(MATCH_NOMATCH);
2574     }
2575     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2576 ph10 427 }
2577 nigel 77 if (min == max) continue;
2578     if (minimize)
2579     {
2580     for (fi = min;; fi++)
2581     {
2582 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2583 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2584 ph10 426 if (fi >= max)
2585     {
2586     CHECK_PARTIAL();
2587 nigel 77 RRETURN(MATCH_NOMATCH);
2588 ph10 426 }
2589     if (eptr >= md->end_subject)
2590 ph10 427 {
2591 ph10 426 SCHECK_PARTIAL();
2592     RRETURN(MATCH_NOMATCH);
2593 ph10 427 }
2594 ph10 426 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2595 nigel 77 }
2596     /* Control never gets here */
2597     }
2598 nigel 93 else /* Maximize */
2599 nigel 77 {
2600     pp = eptr;
2601     for (i = min; i < max; i++)
2602     {
2603     if (eptr >= md->end_subject || fc != *eptr) break;
2604     eptr++;
2605     }
2606 ph10 427 CHECK_PARTIAL();
2607 nigel 93 if (possessive) continue;
2608 nigel 77 while (eptr >= pp)
2609     {
2610 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2611 nigel 77 eptr--;
2612     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2613     }
2614     RRETURN(MATCH_NOMATCH);
2615     }
2616     }
2617     /* Control never gets here */
2618    
2619     /* Match a negated single one-byte character. The character we are
2620     checking can be multibyte. */
2621    
2622     case OP_NOT:
2623     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2624     ecode++;
2625     GETCHARINCTEST(c, eptr);
2626     if ((ims & PCRE_CASELESS) != 0)
2627     {
2628     #ifdef SUPPORT_UTF8
2629     if (c < 256)
2630     #endif
2631     c = md->lcc[c];
2632     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2633     }
2634     else
2635     {
2636     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2637     }
2638     break;
2639    
2640     /* Match a negated single one-byte character repeatedly. This is almost a
2641     repeat of the code for a repeated single character, but I haven't found a
2642     nice way of commoning these up that doesn't require a test of the
2643     positive/negative option for each character match. Maybe that wouldn't add
2644     very much to the time taken, but character matching *is* what this is all
2645     about... */
2646    
2647     case OP_NOTEXACT:
2648     min = max = GET2(ecode, 1);
2649     ecode += 3;
2650     goto REPEATNOTCHAR;
2651    
2652     case OP_NOTUPTO:
2653     case OP_NOTMINUPTO:
2654     min = 0;
2655     max = GET2(ecode, 1);
2656     minimize = *ecode == OP_NOTMINUPTO;
2657     ecode += 3;
2658     goto REPEATNOTCHAR;
2659    
2660 nigel 93 case OP_NOTPOSSTAR:
2661     possessive = TRUE;
2662     min = 0;
2663     max = INT_MAX;
2664     ecode++;
2665     goto REPEATNOTCHAR;
2666    
2667     case OP_NOTPOSPLUS:
2668     possessive = TRUE;
2669     min = 1;
2670     max = INT_MAX;
2671     ecode++;
2672     goto REPEATNOTCHAR;
2673    
2674     case OP_NOTPOSQUERY:
2675     possessive = TRUE;
2676     min = 0;
2677     max = 1;
2678     ecode++;
2679     goto REPEATNOTCHAR;
2680    
2681     case OP_NOTPOSUPTO:
2682     possessive = TRUE;
2683     min = 0;
2684     max = GET2(ecode, 1);
2685     ecode += 3;
2686     goto REPEATNOTCHAR;
2687    
2688 nigel 77 case OP_NOTSTAR:
2689     case OP_NOTMINSTAR:
2690     case OP_NOTPLUS:
2691     case OP_NOTMINPLUS:
2692     case OP_NOTQUERY:
2693     case OP_NOTMINQUERY:
2694     c = *ecode++ - OP_NOTSTAR;
2695     minimize = (c & 1) != 0;
2696     min = rep_min[c]; /* Pick up values from tables; */
2697     max = rep_max[c]; /* zero for max => infinity */
2698     if (max == 0) max = INT_MAX;
2699    
2700 ph10 426 /* Common code for all repeated single-byte matches. */
2701 nigel 77
2702     REPEATNOTCHAR:
2703     fc = *ecode++;
2704    
2705     /* The code is duplicated for the caseless and caseful cases, for speed,
2706     since matching characters is likely to be quite common. First, ensure the
2707     minimum number of matches are present. If min = max, continue at the same
2708     level without recursing. Otherwise, if minimizing, keep trying the rest of
2709     the expression and advancing one matching character if failing, up to the
2710     maximum. Alternatively, if maximizing, find the maximum number of
2711     characters and work backwards. */
2712    
2713     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2714     max, eptr));
2715    
2716     if ((ims & PCRE_CASELESS) != 0)
2717     {
2718     fc = md->lcc[fc];
2719    
2720     #ifdef SUPPORT_UTF8
2721     /* UTF-8 mode */
2722     if (utf8)
2723     {
2724 nigel 93 register unsigned int d;
2725 nigel 77 for (i = 1; i <= min; i++)
2726     {
2727 ph10 426 if (eptr >= md->end_subject)
2728     {
2729     SCHECK_PARTIAL();
2730 ph10 427 RRETURN(MATCH_NOMATCH);
2731     }
2732 nigel 77 GETCHARINC(d, eptr);
2733     if (d < 256) d = md->lcc[d];
2734     if (fc == d) RRETURN(MATCH_NOMATCH);
2735     }
2736     }
2737     else
2738     #endif
2739    
2740     /* Not UTF-8 mode */
2741     {
2742     for (i = 1; i <= min; i++)
2743 ph10 426 {
2744     if (eptr >= md->end_subject)
2745     {
2746     SCHECK_PARTIAL();
2747 ph10 427 RRETURN(MATCH_NOMATCH);
2748     }
2749 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2750 ph10 427 }
2751 nigel 77 }
2752    
2753     if (min == max) continue;
2754    
2755     if (minimize)
2756     {
2757     #ifdef SUPPORT_UTF8
2758     /* UTF-8 mode */
2759     if (utf8)
2760     {
2761 nigel 93 register unsigned int d;
2762 nigel 77 for (fi = min;; fi++)
2763     {
2764 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2765 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766 ph10 426 if (fi >= max)
2767     {
2768     CHECK_PARTIAL();
2769     RRETURN(MATCH_NOMATCH);
2770 ph10 427 }
2771     if (eptr >= md->end_subject)
2772 ph10 426 {
2773 ph10 427 SCHECK_PARTIAL();
2774 ph10 426 RRETURN(MATCH_NOMATCH);
2775 ph10 427 }
2776 nigel 77 GETCHARINC(d, eptr);
2777     if (d < 256) d = md->lcc[d];
2778 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
2779 nigel 77 }
2780     }
2781     else
2782     #endif
2783     /* Not UTF-8 mode */
2784     {
2785     for (fi = min;; fi++)
2786     {
2787 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2788 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789 ph10 426 if (fi >= max)
2790     {
2791     CHECK_PARTIAL();
2792 nigel 77 RRETURN(MATCH_NOMATCH);
2793 ph10 426 }
2794     if (eptr >= md->end_subject)
2795     {
2796     SCHECK_PARTIAL();
2797     RRETURN(MATCH_NOMATCH);
2798     }
2799     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2800 nigel 77 }
2801     }
2802     /* Control never gets here */
2803     }
2804    
2805     /* Maximize case */
2806    
2807     else
2808     {
2809     pp = eptr;
2810    
2811     #ifdef SUPPORT_UTF8
2812     /* UTF-8 mode */
2813     if (utf8)
2814     {
2815 nigel 93 register unsigned int d;
2816 nigel 77 for (i = min; i < max; i++)
2817     {
2818     int len = 1;
2819     if (eptr >= md->end_subject) break;
2820     GETCHARLEN(d, eptr, len);
2821     if (d < 256) d = md->lcc[d];
2822     if (fc == d) break;
2823     eptr += len;
2824     }
2825 ph10 427 CHECK_PARTIAL();
2826 nigel 93 if (possessive) continue;
2827     for(;;)
2828 nigel 77 {
2829 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2830 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2831     if (eptr-- == pp) break; /* Stop if tried at original pos */
2832     BACKCHAR(eptr);
2833     }
2834     }
2835     else
2836     #endif
2837     /* Not UTF-8 mode */
2838     {
2839     for (i = min; i < max; i++)
2840     {
2841     if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2842     eptr++;
2843     }
2844 ph10 427 CHECK_PARTIAL();
2845 nigel 93 if (possessive) continue;
2846 nigel 77 while (eptr >= pp)
2847     {
2848 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2849 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2850     eptr--;
2851     }
2852     }
2853    
2854     RRETURN(MATCH_NOMATCH);
2855     }
2856     /* Control never gets here */
2857     }
2858    
2859     /* Caseful comparisons */
2860    
2861     else
2862     {
2863     #ifdef SUPPORT_UTF8
2864     /* UTF-8 mode */
2865     if (utf8)
2866     {
2867 nigel 93 register unsigned int d;
2868 nigel 77 for (i = 1; i <= min; i++)
2869     {
2870 ph10 426 if (eptr >= md->end_subject)
2871     {
2872     SCHECK_PARTIAL();
2873 ph10 427 RRETURN(MATCH_NOMATCH);
2874     }
2875 nigel 77 GETCHARINC(d, eptr);
2876     if (fc == d) RRETURN(MATCH_NOMATCH);
2877     }
2878     }
2879     else
2880     #endif
2881     /* Not UTF-8 mode */
2882     {
2883     for (i = 1; i <= min; i++)
2884 ph10 426 {
2885     if (eptr >= md->end_subject)
2886     {
2887     SCHECK_PARTIAL();
2888 ph10 427 RRETURN(MATCH_NOMATCH);
2889     }
2890 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2891 ph10 427 }
2892 nigel 77 }
2893    
2894     if (min == max) continue;
2895    
2896     if (minimize)
2897     {
2898     #ifdef SUPPORT_UTF8
2899     /* UTF-8 mode */
2900     if (utf8)
2901     {
2902 nigel 93 register unsigned int d;
2903 nigel 77 for (fi = min;; fi++)
2904     {
2905 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2906 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907 ph10 427 if (fi >= max)
2908 ph10 426 {
2909 ph10 427 CHECK_PARTIAL();
2910 ph10 426 RRETURN(MATCH_NOMATCH);
2911 ph10 427 }
2912     if (eptr >= md->end_subject)
2913 ph10 426 {
2914 ph10 427 SCHECK_PARTIAL();
2915 ph10 426 RRETURN(MATCH_NOMATCH);
2916 ph10 427 }
2917 nigel 77 GETCHARINC(d, eptr);
2918 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
2919 nigel 77 }
2920     }
2921     else
2922     #endif
2923     /* Not UTF-8 mode */
2924     {
2925     for (fi = min;; fi++)
2926     {
2927 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2928 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2929 ph10 426 if (fi >= max)
2930     {
2931     CHECK_PARTIAL();
2932 nigel 77 RRETURN(MATCH_NOMATCH);
2933 ph10 426 }
2934     if (eptr >= md->end_subject)
2935     {
2936     SCHECK_PARTIAL();
2937     RRETURN(MATCH_NOMATCH);
2938 ph10 427 }
2939 ph10 426 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2940 nigel 77 }
2941     }
2942     /* Control never gets here */
2943     }
2944    
2945     /* Maximize case */
2946    
2947     else
2948     {
2949     pp = eptr;
2950    
2951     #ifdef SUPPORT_UTF8
2952     /* UTF-8 mode */
2953     if (utf8)
2954     {
2955 nigel 93 register unsigned int d;
2956 nigel 77 for (i = min; i < max; i++)
2957     {
2958     int len = 1;
2959     if (eptr >= md->end_subject) break;
2960     GETCHARLEN(d, eptr, len);
2961     if (fc == d) break;
2962     eptr += len;
2963     }
2964 ph10 427 CHECK_PARTIAL();
2965 nigel 93 if (possessive) continue;
2966 nigel 77 for(;;)
2967     {
2968 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2969 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2970     if (eptr-- == pp) break; /* Stop if tried at original pos */
2971     BACKCHAR(eptr);
2972     }
2973     }
2974     else
2975     #endif
2976     /* Not UTF-8 mode */
2977     {
2978     for (i = min; i < max; i++)
2979     {
2980     if (eptr >= md->end_subject || fc == *eptr) break;
2981     eptr++;
2982     }
2983 ph10 427 CHECK_PARTIAL();
2984 nigel 93 if (possessive) continue;
2985 nigel 77 while (eptr >= pp)
2986     {
2987 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2988 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2989     eptr--;
2990     }
2991     }
2992    
2993     RRETURN(MATCH_NOMATCH);
2994     }
2995     }
2996     /* Control never gets here */
2997    
2998     /* Match a single character type repeatedly; several different opcodes
2999     share code. This is very similar to the code for single characters, but we
3000     repeat it in the interests of efficiency. */
3001    
3002     case OP_TYPEEXACT:
3003     min = max = GET2(ecode, 1);
3004     minimize = TRUE;
3005     ecode += 3;
3006     goto REPEATTYPE;
3007    
3008     case OP_TYPEUPTO:
3009     case OP_TYPEMINUPTO:
3010     min = 0;
3011     max = GET2(ecode, 1);
3012     minimize = *ecode == OP_TYPEMINUPTO;
3013     ecode += 3;
3014     goto REPEATTYPE;
3015    
3016 nigel 93 case OP_TYPEPOSSTAR:
3017     possessive = TRUE;
3018     min = 0;
3019     max = INT_MAX;
3020     ecode++;
3021     goto REPEATTYPE;
3022    
3023     case OP_TYPEPOSPLUS:
3024     possessive = TRUE;
3025     min = 1;
3026     max = INT_MAX;
3027     ecode++;
3028     goto REPEATTYPE;
3029    
3030     case OP_TYPEPOSQUERY:
3031     possessive = TRUE;
3032     min = 0;
3033     max = 1;
3034     ecode++;
3035     goto REPEATTYPE;
3036    
3037     case OP_TYPEPOSUPTO:
3038     possessive = TRUE;
3039     min = 0;
3040     max = GET2(ecode, 1);
3041     ecode += 3;
3042     goto REPEATTYPE;
3043    
3044 nigel 77 case OP_TYPESTAR:
3045     case OP_TYPEMINSTAR:
3046     case OP_TYPEPLUS:
3047     case OP_TYPEMINPLUS:
3048     case OP_TYPEQUERY:
3049     case OP_TYPEMINQUERY:
3050     c = *ecode++ - OP_TYPESTAR;
3051     minimize = (c & 1) != 0;
3052     min = rep_min[c]; /* Pick up values from tables; */
3053     max = rep_max[c]; /* zero for max => infinity */
3054     if (max == 0) max = INT_MAX;
3055    
3056     /* Common code for all repeated single character type matches. Note that
3057     in UTF-8 mode, '.' matches a character of any length, but for the other
3058     character types, the valid characters are all one-byte long. */
3059    
3060     REPEATTYPE:
3061     ctype = *ecode++; /* Code for the character type */
3062    
3063     #ifdef SUPPORT_UCP
3064     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3065     {
3066     prop_fail_result = ctype == OP_NOTPROP;
3067     prop_type = *ecode++;
3068 nigel 87 prop_value = *ecode++;
3069 nigel 77 }
3070     else prop_type = -1;
3071     #endif
3072    
3073     /* First, ensure the minimum number of matches are present. Use inline
3074     code for maximizing the speed, and do the type test once at the start
3075 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3076 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3077     and single-bytes. */
3078    
3079     if (min > 0)
3080     {
3081     #ifdef SUPPORT_UCP
3082 nigel 87 if (prop_type >= 0)
3083 nigel 77 {
3084 nigel 87 switch(prop_type)
3085 nigel 77 {
3086 nigel 87 case PT_ANY:
3087     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3088     for (i = 1; i <= min; i++)
3089     {
3090 ph10 427 if (eptr >= md->end_subject)
3091 ph10 426 {
3092 ph10 427 SCHECK_PARTIAL();
3093 ph10 426 RRETURN(MATCH_NOMATCH);
3094 ph10 427 }
3095 ph10 184 GETCHARINCTEST(c, eptr);
3096 nigel 87 }
3097     break;
3098    
3099     case PT_LAMP:
3100     for (i = 1; i <= min; i++)
3101     {
3102 ph10 427 if (eptr >= md->end_subject)
3103 ph10 426 {
3104 ph10 427 SCHECK_PARTIAL();
3105 ph10 426 RRETURN(MATCH_NOMATCH);
3106 ph10 427 }
3107 ph10 184 GETCHARINCTEST(c, eptr);
3108 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3109 nigel 87 if ((prop_chartype == ucp_Lu ||
3110     prop_chartype == ucp_Ll ||
3111     prop_chartype == ucp_Lt) == prop_fail_result)
3112     RRETURN(MATCH_NOMATCH);
3113     }
3114     break;
3115    
3116     case PT_GC:
3117     for (i = 1; i <= min; i++)
3118     {
3119 ph10 427 if (eptr >= md->end_subject)
3120 ph10 426 {
3121 ph10 427 SCHECK_PARTIAL();
3122 ph10 426 RRETURN(MATCH_NOMATCH);
3123 ph10 427 }
3124 ph10 184 GETCHARINCTEST(c, eptr);
3125 ph10 349 prop_category = UCD_CATEGORY(c);
3126 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3127     RRETURN(MATCH_NOMATCH);
3128     }
3129     break;
3130    
3131     case PT_PC:
3132     for (i = 1; i <= min; i++)
3133     {
3134 ph10 427 if (eptr >= md->end_subject)
3135 ph10 426 {
3136 ph10 427 SCHECK_PARTIAL();
3137 ph10 426 RRETURN(MATCH_NOMATCH);
3138 ph10 427 }
3139 ph10 184 GETCHARINCTEST(c, eptr);
3140 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3141 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3142     RRETURN(MATCH_NOMATCH);
3143     }
3144     break;
3145    
3146     case PT_SC:
3147     for (i = 1; i <= min; i++)
3148     {
3149 ph10 427 if (eptr >= md->end_subject)
3150 ph10 426 {
3151 ph10 427 SCHECK_PARTIAL();
3152 ph10 426 RRETURN(MATCH_NOMATCH);
3153 ph10 427 }
3154 ph10 184 GETCHARINCTEST(c, eptr);
3155 ph10 349 prop_script = UCD_SCRIPT(c);
3156 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3157     RRETURN(MATCH_NOMATCH);
3158     }
3159     break;
3160    
3161     default:
3162     RRETURN(PCRE_ERROR_INTERNAL);
3163 nigel 77 }
3164     }
3165    
3166     /* Match extended Unicode sequences. We will get here only if the
3167     support is in the binary; otherwise a compile-time error occurs. */
3168    
3169     else if (ctype == OP_EXTUNI)
3170     {
3171     for (i = 1; i <= min; i++)
3172     {
3173 ph10 427 if (eptr >= md->end_subject)
3174 ph10 426 {
3175 ph10 427 SCHECK_PARTIAL();
3176 ph10 426 RRETURN(MATCH_NOMATCH);
3177 ph10 427 }
3178 nigel 77 GETCHARINCTEST(c, eptr);
3179 ph10 349 prop_category = UCD_CATEGORY(c);
3180 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3181     while (eptr < md->end_subject)
3182     {
3183     int len = 1;
3184 ph10 426 if (!utf8) c = *eptr;
3185     else { GETCHARLEN(c, eptr, len); }
3186 ph10 349 prop_category = UCD_CATEGORY(c);
3187 nigel 77 if (prop_category != ucp_M) break;
3188     eptr += len;
3189     }
3190     }
3191     }
3192    
3193     else
3194     #endif /* SUPPORT_UCP */
3195    
3196     /* Handle all other cases when the coding is UTF-8 */
3197    
3198     #ifdef SUPPORT_UTF8
3199     if (utf8) switch(ctype)
3200     {
3201     case OP_ANY:
3202     for (i = 1; i <= min; i++)
3203     {
3204 ph10 426 if (eptr >= md->end_subject)
3205     {
3206 ph10 427 SCHECK_PARTIAL();
3207 nigel 77 RRETURN(MATCH_NOMATCH);
3208 ph10 427 }
3209 ph10 426 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3210 nigel 91 eptr++;
3211 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3212     }
3213     break;
3214    
3215 ph10 341 case OP_ALLANY:
3216     for (i = 1; i <= min; i++)
3217     {
3218 ph10 427 if (eptr >= md->end_subject)
3219 ph10 426 {
3220     SCHECK_PARTIAL();
3221     RRETURN(MATCH_NOMATCH);
3222 ph10 427 }
3223 ph10 341 eptr++;
3224     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3225     }
3226     break;
3227    
3228 nigel 77 case OP_ANYBYTE:
3229 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3230 nigel 77 eptr += min;
3231     break;
3232    
3233 nigel 93 case OP_ANYNL:
3234     for (i = 1; i <= min; i++)
3235     {
3236 ph10 427 if (eptr >= md->end_subject)
3237 ph10 426 {
3238     SCHECK_PARTIAL();
3239     RRETURN(MATCH_NOMATCH);
3240 ph10 427 }
3241 nigel 93 GETCHARINC(c, eptr);
3242     switch(c)
3243     {
3244     default: RRETURN(MATCH_NOMATCH);
3245     case 0x000d:
3246     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3247     break;
3248 ph10 231
3249 nigel 93 case 0x000a:
3250 ph10 231 break;
3251    
3252 nigel 93 case 0x000b:
3253     case 0x000c:
3254     case 0x0085:
3255     case 0x2028:
3256     case 0x2029:
3257 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3258 nigel 93 break;
3259     }
3260     }
3261     break;
3262    
3263 ph10 178 case OP_NOT_HSPACE:
3264     for (i = 1; i <= min; i++)
3265     {
3266 ph10 427 if (eptr >= md->end_subject)
3267 ph10 426 {
3268     SCHECK_PARTIAL();
3269     RRETURN(MATCH_NOMATCH);
3270 ph10 427 }
3271 ph10 178 GETCHARINC(c, eptr);
3272     switch(c)
3273     {
3274     default: break;
3275     case 0x09: /* HT */
3276     case 0x20: /* SPACE */
3277     case 0xa0: /* NBSP */
3278     case 0x1680: /* OGHAM SPACE MARK */
3279     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3280     case 0x2000: /* EN QUAD */
3281     case 0x2001: /* EM QUAD */
3282     case 0x2002: /* EN SPACE */
3283     case 0x2003: /* EM SPACE */
3284     case 0x2004: /* THREE-PER-EM SPACE */
3285     case 0x2005: /* FOUR-PER-EM SPACE */
3286     case 0x2006: /* SIX-PER-EM SPACE */
3287     case 0x2007: /* FIGURE SPACE */
3288     case 0x2008: /* PUNCTUATION SPACE */
3289     case 0x2009: /* THIN SPACE */
3290     case 0x200A: /* HAIR SPACE */
3291     case 0x202f: /* NARROW NO-BREAK SPACE */
3292     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3293     case 0x3000: /* IDEOGRAPHIC SPACE */
3294     RRETURN(MATCH_NOMATCH);
3295     }
3296     }
3297     break;
3298 ph10 182
3299 ph10 178 case OP_HSPACE:
3300     for (i = 1; i <= min; i++)
3301     {
3302 ph10 427 if (eptr >= md->end_subject)
3303 ph10 426 {
3304 ph10 427 SCHECK_PARTIAL();
3305 ph10 426 RRETURN(MATCH_NOMATCH);
3306 ph10 427 }
3307 ph10 178 GETCHARINC(c, eptr);
3308     switch(c)
3309     {
3310     default: RRETURN(MATCH_NOMATCH);
3311     case 0x09: /* HT */
3312     case 0x20: /* SPACE */
3313     case 0xa0: /* NBSP */
3314     case 0x1680: /* OGHAM SPACE MARK */
3315     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3316     case 0x2000: /* EN QUAD */
3317     case 0x2001: /* EM QUAD */
3318     case 0x2002: /* EN SPACE */
3319     case 0x2003: /* EM SPACE */
3320     case 0x2004: /* THREE-PER-EM SPACE */
3321     case 0x2005: /* FOUR-PER-EM SPACE */
3322     case 0x2006: /* SIX-PER-EM SPACE */
3323     case 0x2007: /* FIGURE SPACE */
3324     case 0x2008: /* PUNCTUATION SPACE */
3325     case 0x2009: /* THIN SPACE */
3326     case 0x200A: /* HAIR SPACE */
3327     case 0x202f: /* NARROW NO-BREAK SPACE */
3328     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3329     case 0x3000: /* IDEOGRAPHIC SPACE */
3330     break;
3331     }
3332     }
3333     break;
3334 ph10 182
3335 ph10 178 case OP_NOT_VSPACE:
3336     for (i = 1; i <= min; i++)
3337     {
3338 ph10 427 if (eptr >= md->end_subject)
3339 ph10 426 {
3340 ph10 427 SCHECK_PARTIAL();
3341 ph10 426 RRETURN(MATCH_NOMATCH);
3342 ph10 427 }
3343 ph10 178 GETCHARINC(c, eptr);
3344     switch(c)
3345     {
3346     default: break;
3347     case 0x0a: /* LF */
3348     case 0x0b: /* VT */
3349     case 0x0c: /* FF */
3350     case 0x0d: /* CR */
3351     case 0x85: /* NEL */
3352     case 0x2028: /* LINE SEPARATOR */
3353     case 0x2029: /* PARAGRAPH SEPARATOR */
3354     RRETURN(MATCH_NOMATCH);
3355     }
3356     }
3357     break;
3358 ph10 182
3359 ph10 178 case OP_VSPACE:
3360     for (i = 1; i <= min; i++)
3361     {
3362 ph10 427 if (eptr >= md->end_subject)
3363 ph10 426 {
3364 ph10 427 SCHECK_PARTIAL();
3365 ph10 426 RRETURN(MATCH_NOMATCH);
3366 ph10 427 }
3367 ph10 178 GETCHARINC(c, eptr);
3368     switch(c)
3369     {
3370     default: RRETURN(MATCH_NOMATCH);
3371     case 0x0a: /* LF */
3372     case 0x0b: /* VT */
3373     case 0x0c: /* FF */
3374     case 0x0d: /* CR */
3375     case 0x85: /* NEL */
3376     case 0x2028: /* LINE SEPARATOR */
3377     case 0x2029: /* PARAGRAPH SEPARATOR */
3378 ph10 182 break;
3379 ph10 178 }
3380     }
3381     break;
3382    
3383 nigel 77 case OP_NOT_DIGIT:
3384     for (i = 1; i <= min; i++)
3385     {
3386 ph10 427 if (eptr >= md->end_subject)
3387 ph10 426 {
3388 ph10 427 SCHECK_PARTIAL();
3389 ph10 426 RRETURN(MATCH_NOMATCH);
3390 ph10 427 }
3391 nigel 77 GETCHARINC(c, eptr);
3392     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3393     RRETURN(MATCH_NOMATCH);
3394     }
3395     break;
3396    
3397     case OP_DIGIT:
3398     for (i = 1; i <= min; i++)
3399     {
3400 ph10 427 if (eptr >= md->end_subject)
3401 ph10 426 {
3402 ph10 427 SCHECK_PARTIAL();
3403 nigel 77 RRETURN(MATCH_NOMATCH);
3404 ph10 427 }
3405 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3406     RRETURN(MATCH_NOMATCH);
3407 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3408     }
3409     break;
3410    
3411     case OP_NOT_WHITESPACE:
3412     for (i = 1; i <= min; i++)
3413     {
3414 ph10 427 if (eptr >= md->end_subject)
3415 ph10 426 {
3416 ph10 427 SCHECK_PARTIAL();
3417 nigel 77 RRETURN(MATCH_NOMATCH);
3418 ph10 427 }
3419 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3420     RRETURN(MATCH_NOMATCH);
3421 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3422 nigel 77 }
3423     break;
3424    
3425     case OP_WHITESPACE:
3426     for (i = 1; i <= min; i++)
3427     {
3428 ph10 427 if (eptr >= md->end_subject)
3429 ph10 426 {
3430 ph10 427 SCHECK_PARTIAL();
3431 nigel 77 RRETURN(MATCH_NOMATCH);
3432 ph10 427 }
3433 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3434     RRETURN(MATCH_NOMATCH);
3435 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3436     }
3437     break;
3438    
3439     case OP_NOT_WORDCHAR:
3440     for (i = 1; i <= min; i++)
3441     {
3442     if (eptr >= md->end_subject ||
3443 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3444 nigel 77 RRETURN(MATCH_NOMATCH);
3445 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3446 nigel 77 }
3447     break;
3448    
3449     case OP_WORDCHAR:
3450     for (i = 1; i <= min; i++)
3451     {
3452 ph10 427 if (eptr >= md->end_subject)
3453 ph10 426 {
3454 ph10 427 SCHECK_PARTIAL();
3455 nigel 77 RRETURN(MATCH_NOMATCH);
3456 ph10 427 }
3457 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3458     RRETURN(MATCH_NOMATCH);
3459 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3460     }
3461     break;
3462    
3463     default:
3464     RRETURN(PCRE_ERROR_INTERNAL);
3465     } /* End switch(ctype) */
3466    
3467     else
3468     #endif /* SUPPORT_UTF8 */
3469    
3470     /* Code for the non-UTF-8 case for minimum matching of operators other
3471 ph10 426 than OP_PROP and OP_NOTPROP. */
3472 nigel 77
3473     switch(ctype)
3474     {
3475     case OP_ANY:
3476 ph10 342 for (i = 1; i <= min; i++)
3477 nigel 77 {
3478 ph10 427 if (eptr >= md->end_subject)
3479 ph10 426 {
3480 ph10 427 SCHECK_PARTIAL();
3481 ph10 426 RRETURN(MATCH_NOMATCH);
3482 ph10 427 }
3483 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3484     eptr++;
3485 nigel 77 }
3486     break;
3487    
3488 ph10 341 case OP_ALLANY:
3489 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3490 ph10 341 eptr += min;
3491     break;
3492    
3493 nigel 77 case OP_ANYBYTE:
3494 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3495 nigel 77 eptr += min;
3496     break;
3497    
3498 nigel 93 case OP_ANYNL:
3499     for (i = 1; i <= min; i++)
3500     {
3501 ph10 427 if (eptr >= md->end_subject)
3502 ph10 426 {
3503 ph10 427 SCHECK_PARTIAL();
3504 ph10 426 RRETURN(MATCH_NOMATCH);
3505 ph10 427 }
3506 nigel 93 switch(*eptr++)
3507     {
3508     default: RRETURN(MATCH_NOMATCH);
3509     case 0x000d:
3510     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3511     break;
3512     case 0x000a:
3513 ph10 231 break;
3514    
3515 nigel 93 case 0x000b:
3516     case 0x000c:
3517     case 0x0085:
3518 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3519 nigel 93 break;
3520     }
3521     }
3522     break;
3523    
3524 ph10 178 case OP_NOT_HSPACE:
3525     for (i = 1; i <= min; i++)
3526     {
3527 ph10 427 if (eptr >= md->end_subject)
3528 ph10 426 {
3529 ph10 427 SCHECK_PARTIAL();
3530 ph10 426 RRETURN(MATCH_NOMATCH);
3531 ph10 427 }
3532 ph10 178 switch(*eptr++)
3533     {
3534     default: break;
3535     case 0x09: /* HT */
3536     case 0x20: /* SPACE */
3537     case 0xa0: /* NBSP */
3538     RRETURN(MATCH_NOMATCH);
3539     }
3540     }
3541     break;
3542    
3543     case OP_HSPACE:
3544     for (i = 1; i <= min; i++)
3545     {
3546 ph10 427 if (eptr >= md->end_subject)
3547 ph10 426 {
3548 ph10 427 SCHECK_PARTIAL();
3549 ph10 426 RRETURN(MATCH_NOMATCH);
3550 ph10 427 }
3551 ph10 178 switch(*eptr++)
3552     {
3553     default: RRETURN(MATCH_NOMATCH);
3554     case 0x09: /* HT */
3555     case 0x20: /* SPACE */
3556     case 0xa0: /* NBSP */
3557 ph10 182 break;
3558 ph10 178 }
3559     }
3560     break;
3561    
3562     case OP_NOT_VSPACE:
3563     for (i = 1; i <= min; i++)
3564     {
3565 ph10 427 if (eptr >= md->end_subject)
3566 ph10 426 {
3567 ph10 427 SCHECK_PARTIAL();
3568 ph10 426 RRETURN(MATCH_NOMATCH);
3569 ph10 427 }
3570 ph10 178 switch(*eptr++)
3571     {
3572     default: break;
3573     case 0x0a: /* LF */
3574     case 0x0b: /* VT */
3575     case 0x0c: /* FF */
3576     case 0x0d: /* CR */
3577     case 0x85: /* NEL */
3578     RRETURN(MATCH_NOMATCH);
3579     }
3580     }
3581     break;
3582    
3583     case OP_VSPACE:
3584     for (i = 1; i <= min; i++)
3585     {
3586 ph10 427 if (eptr >= md->end_subject)
3587 ph10 426 {
3588 ph10 427 SCHECK_PARTIAL();
3589 ph10 426 RRETURN(MATCH_NOMATCH);
3590 ph10 427 }
3591 ph10 178 switch(*eptr++)
3592     {
3593     default: RRETURN(MATCH_NOMATCH);
3594     case 0x0a: /* LF */
3595     case 0x0b: /* VT */
3596     case 0x0c: /* FF */
3597     case 0x0d: /* CR */
3598     case 0x85: /* NEL */
3599 ph10 182 break;
3600 ph10 178 }
3601     }
3602     break;
3603    
3604 nigel 77 case OP_NOT_DIGIT:
3605     for (i = 1; i <= min; i++)
3606 ph10 427 {
3607     if (eptr >= md->end_subject)
3608 ph10 426 {
3609 ph10 427 SCHECK_PARTIAL();
3610 ph10 426 RRETURN(MATCH_NOMATCH);
3611 ph10 427 }
3612 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3613 ph10 427 }
3614 nigel 77 break;
3615    
3616     case OP_DIGIT:
3617     for (i = 1; i <= min; i++)
3618 ph10 427 {
3619     if (eptr >= md->end_subject)
3620 ph10 426 {
3621 ph10 427 SCHECK_PARTIAL();
3622 ph10 426 RRETURN(MATCH_NOMATCH);
3623 ph10 427 }
3624 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3625 ph10 427 }
3626 nigel 77 break;
3627    
3628     case OP_NOT_WHITESPACE:
3629     for (i = 1; i <= min; i++)
3630 ph10 427 {
3631     if (eptr >= md->end_subject)
3632 ph10 426 {
3633 ph10 427 SCHECK_PARTIAL();
3634 ph10 426 RRETURN(MATCH_NOMATCH);
3635 ph10 427 }
3636 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3637 ph10 427 }
3638 nigel 77 break;
3639    
3640     case OP_WHITESPACE:
3641     for (i = 1; i <= min; i++)
3642 ph10 427 {
3643     if (eptr >= md->end_subject)
3644 ph10 426 {
3645 ph10 427 SCHECK_PARTIAL();
3646 ph10 426 RRETURN(MATCH_NOMATCH);
3647 ph10 427 }
3648 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3649 ph10 427 }
3650 nigel 77 break;
3651    
3652     case OP_NOT_WORDCHAR:
3653     for (i = 1; i <= min; i++)
3654 ph10 427 {
3655     if (eptr >= md->end_subject)
3656 ph10 426 {
3657 ph10 427 SCHECK_PARTIAL();
3658 ph10 426 RRETURN(MATCH_NOMATCH);
3659 ph10 427 }
3660 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3661     RRETURN(MATCH_NOMATCH);
3662 ph10 427 }
3663 nigel 77 break;
3664    
3665     case OP_WORDCHAR:
3666     for (i = 1; i <= min; i++)
3667 ph10 427 {
3668     if (eptr >= md->end_subject)
3669 ph10 426 {
3670 ph10 427 SCHECK_PARTIAL();
3671 ph10 426 RRETURN(MATCH_NOMATCH);
3672 ph10 427 }
3673 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3674     RRETURN(MATCH_NOMATCH);
3675 ph10 427 }
3676 nigel 77 break;
3677    
3678     default:
3679     RRETURN(PCRE_ERROR_INTERNAL);
3680     }
3681     }
3682    
3683     /* If min = max, continue at the same level without recursing */
3684    
3685     if (min == max) continue;
3686    
3687     /* If minimizing, we have to test the rest of the pattern before each
3688     subsequent match. Again, separate the UTF-8 case for speed, and also
3689     separate the UCP cases. */
3690    
3691     if (minimize)
3692     {
3693     #ifdef SUPPORT_UCP
3694 nigel 87 if (prop_type >= 0)
3695 nigel 77 {
3696 nigel 87 switch(prop_type)
3697 nigel 77 {
3698 nigel 87 case PT_ANY:
3699     for (fi = min;; fi++)
3700     {
3701 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3702 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703 ph10 427 if (fi >= max)
3704 ph10 426 {
3705 ph10 427 CHECK_PARTIAL();
3706 ph10 426 RRETURN(MATCH_NOMATCH);
3707 ph10 427 }
3708     if (eptr >= md->end_subject)
3709 ph10 426 {
3710 ph10 427 SCHECK_PARTIAL();
3711 ph10 426 RRETURN(MATCH_NOMATCH);
3712 ph10 427 }
3713 nigel 87 GETCHARINC(c, eptr);
3714     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3715     }
3716 nigel 93 /* Control never gets here */
3717 nigel 87
3718     case PT_LAMP:
3719     for (fi = min;; fi++)
3720     {
3721 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3722 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3723 ph10 427 if (fi >= max)
3724 ph10 426 {
3725 ph10 427 CHECK_PARTIAL();
3726 ph10 426 RRETURN(MATCH_NOMATCH);
3727 ph10 427 }
3728     if (eptr >= md->end_subject)
3729 ph10 426 {
3730 ph10 427 SCHECK_PARTIAL();
3731 ph10 426 RRETURN(MATCH_NOMATCH);
3732 ph10 427 }
3733 nigel 87 GETCHARINC(c, eptr);
3734 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3735 nigel 87 if ((prop_chartype == ucp_Lu ||
3736     prop_chartype == ucp_Ll ||
3737     prop_chartype == ucp_Lt) == prop_fail_result)
3738     RRETURN(MATCH_NOMATCH);
3739     }
3740 nigel 93 /* Control never gets here */
3741 nigel 87
3742     case PT_GC:
3743     for (fi = min;; fi++)
3744     {
3745 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3746 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3747 ph10 427 if (fi >= max)
3748 ph10 426 {
3749 ph10 427 CHECK_PARTIAL();
3750 ph10 426 RRETURN(MATCH_NOMATCH);
3751 ph10 427 }
3752     if (eptr >= md->end_subject)
3753 ph10 426 {
3754 ph10 427 SCHECK_PARTIAL();
3755 ph10 426 RRETURN(MATCH_NOMATCH);
3756 ph10 427 }
3757 nigel 87 GETCHARINC(c, eptr);
3758 ph10 349 prop_category = UCD_CATEGORY(c);
3759 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3760     RRETURN(MATCH_NOMATCH);
3761     }
3762 nigel 93 /* Control never gets here */
3763 nigel 87
3764     case PT_PC:
3765     for (fi = min;; fi++)
3766     {
3767 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3768 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769 ph10 427 if (fi >= max)
3770 ph10 426 {
3771 ph10 427 CHECK_PARTIAL();
3772 ph10 426 RRETURN(MATCH_NOMATCH);
3773 ph10 427 }
3774     if (eptr >= md->end_subject)
3775 ph10 426 {
3776 ph10 427 SCHECK_PARTIAL();
3777 ph10 426 RRETURN(MATCH_NOMATCH);
3778 ph10 427 }
3779 nigel 87 GETCHARINC(c, eptr);
3780 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3781 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3782     RRETURN(MATCH_NOMATCH);
3783     }
3784 nigel 93 /* Control never gets here */
3785 nigel 87
3786     case PT_SC:
3787     for (fi = min;; fi++)
3788     {
3789 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3790 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791 ph10 427 if (fi >= max)
3792 ph10 426 {
3793 ph10 427 CHECK_PARTIAL();
3794 ph10 426 RRETURN(MATCH_NOMATCH);
3795 ph10 427 }
3796     if (eptr >= md->end_subject)
3797 ph10 426 {
3798 ph10 427 SCHECK_PARTIAL();
3799 ph10 426 RRETURN(MATCH_NOMATCH);
3800 ph10 427 }
3801 nigel 87 GETCHARINC(c, eptr);
3802 ph10 349 prop_script = UCD_SCRIPT(c);
3803 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3804     RRETURN(MATCH_NOMATCH);
3805     }
3806 nigel 93 /* Control never gets here */
3807 nigel 87
3808     default:
3809     RRETURN(PCRE_ERROR_INTERNAL);
3810 nigel 77 }
3811     }
3812    
3813     /* Match extended Unicode sequences. We will get here only if the
3814     support is in the binary; otherwise a compile-time error occurs. */
3815    
3816     else if (ctype == OP_EXTUNI)
3817     {
3818     for (fi = min;; fi++)
3819     {
3820 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3821 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3822 ph10 427 if (fi >= max)
3823 ph10 426 {
3824 ph10 427 CHECK_PARTIAL();
3825 ph10 426 RRETURN(MATCH_NOMATCH);
3826 ph10 427 }
3827     if (eptr >= md->end_subject)
3828 ph10 426 {
3829 ph10 427 SCHECK_PARTIAL();
3830 ph10 426 RRETURN(MATCH_NOMATCH);
3831 ph10 427 }
3832 nigel 77 GETCHARINCTEST(c, eptr);
3833 ph10 349 prop_category = UCD_CATEGORY(c);
3834 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3835     while (eptr < md->end_subject)
3836     {
3837     int len = 1;
3838 ph10 426 if (!utf8) c = *eptr;
3839     else { GETCHARLEN(c, eptr, len); }
3840 ph10 349 prop_category = UCD_CATEGORY(c);
3841 nigel 77 if (prop_category != ucp_M) break;
3842     eptr += len;
3843     }
3844     }
3845     }
3846    
3847     else
3848     #endif /* SUPPORT_UCP */
3849    
3850     #ifdef SUPPORT_UTF8
3851     /* UTF-8 mode */
3852     if (utf8)
3853     {
3854     for (fi = min;; fi++)
3855     {
3856 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3857 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858 ph10 427 if (fi >= max)
3859 ph10 426 {
3860 ph10 427 CHECK_PARTIAL();
3861 nigel 91 RRETURN(MATCH_NOMATCH);
3862 ph10 427 }
3863     if (eptr >= md->end_subject)
3864 ph10 426 {
3865 ph10 427 SCHECK_PARTIAL();
3866 ph10 426 RRETURN(MATCH_NOMATCH);
3867 ph10 427 }
3868 ph10 426 if (ctype == OP_ANY && IS_NEWLINE(eptr))
3869     RRETURN(MATCH_NOMATCH);
3870 nigel 77 GETCHARINC(c, eptr);
3871     switch(ctype)
3872     {
3873 ph10 342 case OP_ANY: /* This is the non-NL case */
3874 ph10 345 case OP_ALLANY:
3875 nigel 77 case OP_ANYBYTE:
3876     break;
3877    
3878 nigel 93 case OP_ANYNL:
3879     switch(c)
3880     {
3881     default: RRETURN(MATCH_NOMATCH);
3882     case 0x000d:
3883     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3884     break;
3885     case 0x000a:
3886 ph10 231 break;
3887    
3888 nigel 93 case 0x000b:
3889     case 0x000c:
3890     case 0x0085:
3891     case 0x2028:
3892     case 0x2029:
3893 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3894 nigel 93 break;
3895     }
3896     break;
3897    
3898 ph10 178 case OP_NOT_HSPACE:
3899     switch(c)
3900     {
3901     default: break;
3902     case 0x09: /* HT */
3903     case 0x20: /* SPACE */
3904     case 0xa0: /* NBSP */
3905     case 0x1680: /* OGHAM SPACE MARK */
3906     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3907     case 0x2000: /* EN QUAD */
3908     case 0x2001: /* EM QUAD */
3909     case 0x2002: /* EN SPACE */
3910     case 0x2003: /* EM SPACE */
3911     case 0x2004: /* THREE-PER-EM SPACE */
3912     case 0x2005: /* FOUR-PER-EM SPACE */
3913     case 0x2006: /* SIX-PER-EM SPACE */
3914     case 0x2007: /* FIGURE SPACE */
3915     case 0x2008: /* PUNCTUATION SPACE */
3916     case 0x2009: /* THIN SPACE */
3917     case 0x200A: /* HAIR SPACE */
3918     case 0x202f: /* NARROW NO-BREAK SPACE */
3919     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3920     case 0x3000: /* IDEOGRAPHIC SPACE */
3921     RRETURN(MATCH_NOMATCH);
3922     }
3923     break;
3924    
3925     case OP_HSPACE:
3926     switch(c)
3927     {
3928     default: RRETURN(MATCH_NOMATCH);
3929     case 0x09: /* HT */
3930     case 0x20: /* SPACE */
3931     case 0xa0: /* NBSP */
3932     case 0x1680: /* OGHAM SPACE MARK */
3933     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3934     case 0x2000: /* EN QUAD */
3935     case 0x2001: /* EM QUAD */
3936     case 0x2002: /* EN SPACE */
3937     case 0x2003: /* EM SPACE */
3938     case 0x2004: /* THREE-PER-EM SPACE */
3939     case 0x2005: /* FOUR-PER-EM SPACE */
3940     case 0x2006: /* SIX-PER-EM SPACE */
3941     case 0x2007: /* FIGURE SPACE */
3942     case 0x2008: /* PUNCTUATION SPACE */
3943     case 0x2009: /* THIN SPACE */
3944     case 0x200A: /* HAIR SPACE */
3945     case 0x202f: /* NARROW NO-BREAK SPACE */
3946     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3947     case 0x3000: /* IDEOGRAPHIC SPACE */
3948     break;
3949     }
3950     break;
3951    
3952     case OP_NOT_VSPACE:
3953     switch(c)
3954     {
3955     default: break;
3956     case 0x0a: /* LF */
3957     case 0x0b: /* VT */
3958     case 0x0c: /* FF */
3959     case 0x0d: /* CR */
3960     case 0x85: /* NEL */
3961     case 0x2028: /* LINE SEPARATOR */
3962     case 0x2029: /* PARAGRAPH SEPARATOR */
3963     RRETURN(MATCH_NOMATCH);
3964     }
3965     break;
3966    
3967     case OP_VSPACE:
3968     switch(c)
3969     {
3970     default: RRETURN(MATCH_NOMATCH);
3971     case 0x0a: /* LF */
3972     case 0x0b: /* VT */
3973     case 0x0c: /* FF */
3974     case 0x0d: /* CR */
3975     case 0x85: /* NEL */
3976     case 0x2028: /* LINE SEPARATOR */
3977     case 0x2029: /* PARAGRAPH SEPARATOR */
3978     break;
3979     }
3980     break;
3981    
3982 nigel 77 case OP_NOT_DIGIT:
3983     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3984     RRETURN(MATCH_NOMATCH);
3985     break;
3986    
3987