/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 426 - (hide annotations) (download)
Wed Aug 26 15:38:32 2009 UTC (5 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 161361 byte(s)
Remove restrictions on pcre_exec() partial matching.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 381 Copyright (c) 1997-2009 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92     #ifdef DEBUG
93     /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144     #ifdef DEBUG
145     if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 164 actuall used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 nigel 87 #ifdef DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 164 newframe->Xoffset_top = rc;\
292     newframe->Xims = re;\
293     newframe->Xeptrb = rf;\
294     newframe->Xflags = rg;\
295     newframe->Xrdepth = frame->Xrdepth + 1;\
296     newframe->Xprevframe = frame;\
297     frame = newframe;\
298     DPRINTF(("restarting from line %d\n", __LINE__));\
299     goto HEAP_RECURSE;\
300     L_##rw:\
301     DPRINTF(("jumped back to line %d\n", __LINE__));\
302 nigel 77 }
303    
304     #define RRETURN(ra)\
305     {\
306     heapframe *newframe = frame;\
307     frame = newframe->Xprevframe;\
308     (pcre_stack_free)(newframe);\
309     if (frame != NULL)\
310     {\
311 ph10 164 rrc = ra;\
312     goto HEAP_RETURN;\
313 nigel 77 }\
314     return ra;\
315     }
316    
317    
318     /* Structure for remembering the local variables in a private frame */
319    
320     typedef struct heapframe {
321     struct heapframe *Xprevframe;
322    
323     /* Function arguments that may change */
324    
325 ph10 409 USPTR Xeptr;
326 nigel 77 const uschar *Xecode;
327 ph10 409 USPTR Xmstart;
328 nigel 77 int Xoffset_top;
329     long int Xims;
330     eptrblock *Xeptrb;
331     int Xflags;
332 nigel 91 unsigned int Xrdepth;
333 nigel 77
334     /* Function local variables */
335    
336 ph10 409 USPTR Xcallpat;
337 ph10 406 #ifdef SUPPORT_UTF8
338 ph10 409 USPTR Xcharptr;
339 ph10 406 #endif
340 ph10 409 USPTR Xdata;
341     USPTR Xnext;
342     USPTR Xpp;
343     USPTR Xprev;
344     USPTR Xsaved_eptr;
345 nigel 77
346     recursion_info Xnew_recursive;
347    
348     BOOL Xcur_is_word;
349     BOOL Xcondition;
350     BOOL Xprev_is_word;
351    
352     unsigned long int Xoriginal_ims;
353    
354     #ifdef SUPPORT_UCP
355     int Xprop_type;
356 nigel 87 int Xprop_value;
357 nigel 77 int Xprop_fail_result;
358     int Xprop_category;
359     int Xprop_chartype;
360 nigel 87 int Xprop_script;
361 ph10 123 int Xoclength;
362     uschar Xocchars[8];
363 nigel 77 #endif
364    
365 ph10 403 int Xcodelink;
366 nigel 77 int Xctype;
367 nigel 93 unsigned int Xfc;
368 nigel 77 int Xfi;
369     int Xlength;
370     int Xmax;
371     int Xmin;
372     int Xnumber;
373     int Xoffset;
374     int Xop;
375     int Xsave_capture_last;
376     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377     int Xstacksave[REC_STACK_SAVE_MAX];
378    
379     eptrblock Xnewptrb;
380    
381 ph10 164 /* Where to jump back to */
382 nigel 77
383 ph10 164 int Xwhere;
384 ph10 165
385 nigel 77 } heapframe;
386    
387     #endif
388    
389    
390     /***************************************************************************
391     ***************************************************************************/
392    
393    
394    
395     /*************************************************
396     * Match from current position *
397     *************************************************/
398    
399 nigel 93 /* This function is called recursively in many circumstances. Whenever it
400 nigel 77 returns a negative (error) response, the outer incarnation must also return the
401 ph10 426 same response. */
402 nigel 77
403 ph10 426 /* These macros pack up tests that are used for partial matching, and which
404     appears several times in the code. We set the "hit end" flag if the pointer is
405     at the end of the subject and also past the start of the subject (i.e.
406     something has been matched). The second one is used when we already know we are
407     past the end of the subject. */
408    
409     #define CHECK_PARTIAL()\
410     if (md->partial && eptr >= md->end_subject && eptr > mstart)\
411     md->hitend = TRUE
412    
413     #define SCHECK_PARTIAL()\
414     if (md->partial && eptr > mstart) md->hitend = TRUE
415    
416     /* Performance note: It might be tempting to extract commonly used fields from
417     the md structure (e.g. utf8, end_subject) into individual variables to improve
418 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
419     made performance worse.
420    
421     Arguments:
422 nigel 93 eptr pointer to current character in subject
423     ecode pointer to current position in compiled code
424 ph10 168 mstart pointer to the current match start position (can be modified
425 ph10 172 by encountering \K)
426 nigel 77 offset_top current top pointer
427     md pointer to "static" info for the match
428     ims current /i, /m, and /s options
429     eptrb pointer to chain of blocks containing eptr at start of
430     brackets - for testing for empty matches
431     flags can contain
432     match_condassert - this is an assertion condition
433 nigel 93 match_cbegroup - this is the start of an unlimited repeat
434     group that can match an empty string
435 nigel 87 rdepth the recursion depth
436 nigel 77
437     Returns: MATCH_MATCH if matched ) these values are >= 0
438     MATCH_NOMATCH if failed to match )
439     a negative PCRE_ERROR_xxx value if aborted by an error condition
440 nigel 87 (e.g. stopped by repeated call or recursion limit)
441 nigel 77 */
442    
443     static int
444 ph10 409 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
445 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
446 nigel 91 int flags, unsigned int rdepth)
447 nigel 77 {
448     /* These variables do not need to be preserved over recursion in this function,
449 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
450     "register" because they are used a lot in loops. */
451 nigel 77
452 nigel 91 register int rrc; /* Returns from recursive calls */
453     register int i; /* Used for loops not involving calls to RMATCH() */
454 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
455 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
456 nigel 77
457 nigel 93 BOOL minimize, possessive; /* Quantifier options */
458 ph10 403 int condcode;
459 nigel 93
460 nigel 77 /* When recursion is not being used, all "local" variables that have to be
461     preserved over calls to RMATCH() are part of a "frame" which is obtained from
462     heap storage. Set up the top-level frame here; others are obtained from the
463     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
464    
465     #ifdef NO_RECURSE
466     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
467     frame->Xprevframe = NULL; /* Marks the top level */
468    
469     /* Copy in the original argument variables */
470    
471     frame->Xeptr = eptr;
472     frame->Xecode = ecode;
473 ph10 168 frame->Xmstart = mstart;
474 nigel 77 frame->Xoffset_top = offset_top;
475     frame->Xims = ims;
476     frame->Xeptrb = eptrb;
477     frame->Xflags = flags;
478 nigel 87 frame->Xrdepth = rdepth;
479 nigel 77
480     /* This is where control jumps back to to effect "recursion" */
481    
482     HEAP_RECURSE:
483    
484     /* Macros make the argument variables come from the current frame */
485    
486     #define eptr frame->Xeptr
487     #define ecode frame->Xecode
488 ph10 168 #define mstart frame->Xmstart
489 nigel 77 #define offset_top frame->Xoffset_top
490     #define ims frame->Xims
491     #define eptrb frame->Xeptrb
492     #define flags frame->Xflags
493 nigel 87 #define rdepth frame->Xrdepth
494 nigel 77
495     /* Ditto for the local variables */
496    
497     #ifdef SUPPORT_UTF8
498     #define charptr frame->Xcharptr
499     #endif
500     #define callpat frame->Xcallpat
501 ph10 403 #define codelink frame->Xcodelink
502 nigel 77 #define data frame->Xdata
503     #define next frame->Xnext
504     #define pp frame->Xpp
505     #define prev frame->Xprev
506     #define saved_eptr frame->Xsaved_eptr
507    
508     #define new_recursive frame->Xnew_recursive
509    
510     #define cur_is_word frame->Xcur_is_word
511     #define condition frame->Xcondition
512     #define prev_is_word frame->Xprev_is_word
513    
514     #define original_ims frame->Xoriginal_ims
515    
516     #ifdef SUPPORT_UCP
517     #define prop_type frame->Xprop_type
518 nigel 87 #define prop_value frame->Xprop_value
519 nigel 77 #define prop_fail_result frame->Xprop_fail_result
520     #define prop_category frame->Xprop_category
521     #define prop_chartype frame->Xprop_chartype
522 nigel 87 #define prop_script frame->Xprop_script
523 ph10 115 #define oclength frame->Xoclength
524     #define occhars frame->Xocchars
525 nigel 77 #endif
526    
527     #define ctype frame->Xctype
528     #define fc frame->Xfc
529     #define fi frame->Xfi
530     #define length frame->Xlength
531     #define max frame->Xmax
532     #define min frame->Xmin
533     #define number frame->Xnumber
534     #define offset frame->Xoffset
535     #define op frame->Xop
536     #define save_capture_last frame->Xsave_capture_last
537     #define save_offset1 frame->Xsave_offset1
538     #define save_offset2 frame->Xsave_offset2
539     #define save_offset3 frame->Xsave_offset3
540     #define stacksave frame->Xstacksave
541    
542     #define newptrb frame->Xnewptrb
543    
544     /* When recursion is being used, local variables are allocated on the stack and
545     get preserved during recursion in the normal way. In this environment, fi and
546     i, and fc and c, can be the same variables. */
547    
548 nigel 93 #else /* NO_RECURSE not defined */
549 nigel 77 #define fi i
550     #define fc c
551    
552    
553 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
554     const uschar *charptr; /* in small blocks of the code. My normal */
555     #endif /* style of coding would have declared */
556     const uschar *callpat; /* them within each of those blocks. */
557     const uschar *data; /* However, in order to accommodate the */
558     const uschar *next; /* version of this code that uses an */
559     USPTR pp; /* external "stack" implemented on the */
560     const uschar *prev; /* heap, it is easier to declare them all */
561     USPTR saved_eptr; /* here, so the declarations can be cut */
562     /* out in a block. The only declarations */
563     recursion_info new_recursive; /* within blocks below are for variables */
564     /* that do not have to be preserved over */
565     BOOL cur_is_word; /* a recursive call to RMATCH(). */
566     BOOL condition;
567 nigel 77 BOOL prev_is_word;
568    
569     unsigned long int original_ims;
570    
571     #ifdef SUPPORT_UCP
572     int prop_type;
573 nigel 87 int prop_value;
574 nigel 77 int prop_fail_result;
575     int prop_category;
576     int prop_chartype;
577 nigel 87 int prop_script;
578 ph10 115 int oclength;
579     uschar occhars[8];
580 nigel 77 #endif
581    
582 ph10 399 int codelink;
583 nigel 77 int ctype;
584     int length;
585     int max;
586     int min;
587     int number;
588     int offset;
589     int op;
590     int save_capture_last;
591     int save_offset1, save_offset2, save_offset3;
592     int stacksave[REC_STACK_SAVE_MAX];
593    
594     eptrblock newptrb;
595 nigel 93 #endif /* NO_RECURSE */
596 nigel 77
597     /* These statements are here to stop the compiler complaining about unitialized
598     variables. */
599    
600     #ifdef SUPPORT_UCP
601 nigel 87 prop_value = 0;
602 nigel 77 prop_fail_result = 0;
603     #endif
604    
605 nigel 93
606 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
607     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
608     used. Thanks to Ian Taylor for noticing this possibility and sending the
609     original patch. */
610    
611     TAIL_RECURSE:
612    
613 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
614     are specified by the macro RMATCH and RRETURN is used to return. When
615     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
616     and a "return", respectively (possibly with some debugging if DEBUG is
617     defined). However, RMATCH isn't like a function call because it's quite a
618     complicated macro. It has to be used in one particular way. This shouldn't,
619     however, impact performance when true recursion is being used. */
620 nigel 77
621 ph10 164 #ifdef SUPPORT_UTF8
622     utf8 = md->utf8; /* Local copy of the flag */
623     #else
624     utf8 = FALSE;
625     #endif
626    
627 nigel 87 /* First check that we haven't called match() too many times, or that we
628     haven't exceeded the recursive call limit. */
629    
630 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
631 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
632 nigel 77
633     original_ims = ims; /* Save for resetting on ')' */
634 nigel 91
635 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
636     string, the match_cbegroup flag is set. When this is the case, add the current
637     subject pointer to the chain of such remembered pointers, to be checked when we
638     hit the closing ket, in order to break infinite loops that match no characters.
639 ph10 197 When match() is called in other circumstances, don't add to the chain. The
640     match_cbegroup flag must NOT be used with tail recursion, because the memory
641     block that is used is on the stack, so a new one may be required for each
642     match(). */
643 nigel 77
644 nigel 93 if ((flags & match_cbegroup) != 0)
645 nigel 77 {
646 ph10 197 newptrb.epb_saved_eptr = eptr;
647     newptrb.epb_prev = eptrb;
648     eptrb = &newptrb;
649 nigel 77 }
650    
651 nigel 93 /* Now start processing the opcodes. */
652 nigel 77
653     for (;;)
654     {
655 nigel 93 minimize = possessive = FALSE;
656 nigel 77 op = *ecode;
657 ph10 406
658 nigel 77 /* For partial matching, remember if we ever hit the end of the subject after
659 ph10 426 matching at least one subject character. This code is now wrapped in a macro
660     because it appears several times below. */
661 nigel 77
662 ph10 426 CHECK_PARTIAL();
663 ph10 208
664 nigel 93 switch(op)
665     {
666 ph10 210 case OP_FAIL:
667 ph10 212 RRETURN(MATCH_NOMATCH);
668 ph10 211
669 ph10 210 case OP_PRUNE:
670     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
671     ims, eptrb, flags, RM51);
672     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
673 ph10 212 RRETURN(MATCH_PRUNE);
674 ph10 211
675 ph10 210 case OP_COMMIT:
676     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
677     ims, eptrb, flags, RM52);
678     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
679 ph10 212 RRETURN(MATCH_COMMIT);
680 ph10 211
681 ph10 210 case OP_SKIP:
682     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
683     ims, eptrb, flags, RM53);
684     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
685 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
686 ph10 212 RRETURN(MATCH_SKIP);
687 ph10 211
688 ph10 210 case OP_THEN:
689     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
690 ph10 212 ims, eptrb, flags, RM54);
691 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
692 ph10 212 RRETURN(MATCH_THEN);
693 ph10 211
694 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
695     the current subject position in the working slot at the top of the vector.
696     We mustn't change the current values of the data slot, because they may be
697     set from a previous iteration of this group, and be referred to by a
698     reference inside the group.
699 nigel 77
700 nigel 93 If the bracket fails to match, we need to restore this value and also the
701     values of the final offsets, in case they were set by a previous iteration
702     of the same bracket.
703 nigel 77
704 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
705     a non-capturing bracket. Don't worry about setting the flag for the error
706     case here; that is handled in the code for KET. */
707 nigel 77
708 nigel 93 case OP_CBRA:
709     case OP_SCBRA:
710     number = GET2(ecode, 1+LINK_SIZE);
711 nigel 77 offset = number << 1;
712    
713     #ifdef DEBUG
714 nigel 93 printf("start bracket %d\n", number);
715     printf("subject=");
716 nigel 77 pchars(eptr, 16, TRUE, md);
717     printf("\n");
718     #endif
719    
720     if (offset < md->offset_max)
721     {
722     save_offset1 = md->offset_vector[offset];
723     save_offset2 = md->offset_vector[offset+1];
724     save_offset3 = md->offset_vector[md->offset_end - number];
725     save_capture_last = md->capture_last;
726    
727     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
728     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
729    
730 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
731 nigel 77 do
732     {
733 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734     ims, eptrb, flags, RM1);
735 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736 nigel 77 md->capture_last = save_capture_last;
737     ecode += GET(ecode, 1);
738     }
739     while (*ecode == OP_ALT);
740    
741     DPRINTF(("bracket %d failed\n", number));
742    
743     md->offset_vector[offset] = save_offset1;
744     md->offset_vector[offset+1] = save_offset2;
745     md->offset_vector[md->offset_end - number] = save_offset3;
746    
747     RRETURN(MATCH_NOMATCH);
748     }
749    
750 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
751     as a non-capturing bracket. */
752 nigel 77
753 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
754     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
755    
756 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
757 nigel 77
758 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
759     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
760    
761 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
762     final alternative within the brackets, we would return the result of a
763     recursive call to match() whatever happened. We can reduce stack usage by
764 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
765     is set.*/
766 nigel 77
767 nigel 93 case OP_BRA:
768     case OP_SBRA:
769     DPRINTF(("start non-capturing bracket\n"));
770     flags = (op >= OP_SBRA)? match_cbegroup : 0;
771 nigel 91 for (;;)
772 nigel 77 {
773 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
774 nigel 93 {
775 ph10 197 if (flags == 0) /* Not a possibly empty group */
776     {
777     ecode += _pcre_OP_lengths[*ecode];
778     DPRINTF(("bracket 0 tail recursion\n"));
779     goto TAIL_RECURSE;
780     }
781    
782     /* Possibly empty group; can't use tail recursion. */
783    
784     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
785     eptrb, flags, RM48);
786     RRETURN(rrc);
787 nigel 93 }
788 nigel 91
789     /* For non-final alternatives, continue the loop for a NOMATCH result;
790     otherwise return. */
791    
792 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793     eptrb, flags, RM2);
794 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
795 nigel 77 ecode += GET(ecode, 1);
796     }
797 nigel 91 /* Control never reaches here. */
798 nigel 77
799     /* Conditional group: compilation checked that there are no more than
800     two branches. If the condition is false, skipping the first branch takes us
801     past the end if there is only one branch, but that's OK because that is
802 nigel 91 exactly what going to the ket would do. As there is only one branch to be
803     obeyed, we can use tail recursion to avoid using another stack frame. */
804 nigel 77
805     case OP_COND:
806 nigel 93 case OP_SCOND:
807 ph10 399 codelink= GET(ecode, 1);
808 ph10 406
809 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
810     inserted between OP_COND and an assertion condition. */
811 ph10 392
812 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
813     {
814     if (pcre_callout != NULL)
815     {
816     pcre_callout_block cb;
817     cb.version = 1; /* Version 1 of the callout block */
818     cb.callout_number = ecode[LINK_SIZE+2];
819     cb.offset_vector = md->offset_vector;
820     cb.subject = (PCRE_SPTR)md->start_subject;
821     cb.subject_length = md->end_subject - md->start_subject;
822     cb.start_match = mstart - md->start_subject;
823     cb.current_position = eptr - md->start_subject;
824     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
825     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
826     cb.capture_top = offset_top/2;
827     cb.capture_last = md->capture_last;
828     cb.callout_data = md->callout_data;
829     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
830     if (rrc < 0) RRETURN(rrc);
831     }
832     ecode += _pcre_OP_lengths[OP_CALLOUT];
833     }
834 ph10 392
835 ph10 399 condcode = ecode[LINK_SIZE+1];
836 ph10 406
837 ph10 381 /* Now see what the actual condition is */
838 ph10 392
839 ph10 399 if (condcode == OP_RREF) /* Recursion test */
840 nigel 77 {
841 nigel 93 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
842     condition = md->recursive != NULL &&
843     (offset == RREF_ANY || offset == md->recursive->group_num);
844     ecode += condition? 3 : GET(ecode, 1);
845     }
846    
847 ph10 399 else if (condcode == OP_CREF) /* Group used test */
848 nigel 93 {
849 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
850 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
851     ecode += condition? 3 : GET(ecode, 1);
852 nigel 77 }
853    
854 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
855 nigel 93 {
856     condition = FALSE;
857     ecode += GET(ecode, 1);
858     }
859    
860 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
861 nigel 93 the final argument match_condassert causes it to stop at the end of an
862     assertion. */
863 nigel 77
864     else
865     {
866 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
867     match_condassert, RM3);
868 nigel 77 if (rrc == MATCH_MATCH)
869     {
870 nigel 93 condition = TRUE;
871     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
872 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
873     }
874 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
875 nigel 77 {
876     RRETURN(rrc); /* Need braces because of following else */
877     }
878 nigel 93 else
879     {
880     condition = FALSE;
881 ph10 399 ecode += codelink;
882 nigel 93 }
883     }
884 nigel 91
885 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
886 ph10 197 we can use tail recursion to avoid using another stack frame, except when
887     match_cbegroup is required for an unlimited repeat of a possibly empty
888     group. If the second alternative doesn't exist, we can just plough on. */
889 nigel 91
890 nigel 93 if (condition || *ecode == OP_ALT)
891     {
892 nigel 91 ecode += 1 + LINK_SIZE;
893 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
894     {
895     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
896     RRETURN(rrc);
897     }
898     else /* Group must match something */
899     {
900     flags = 0;
901     goto TAIL_RECURSE;
902     }
903 nigel 77 }
904 ph10 395 else /* Condition false & no alternative */
905 nigel 93 {
906     ecode += 1 + LINK_SIZE;
907     }
908     break;
909 nigel 77
910    
911 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
912     recursion, we should restore the offsets appropriately and continue from
913     after the call. */
914 nigel 77
915 ph10 210 case OP_ACCEPT:
916 nigel 77 case OP_END:
917     if (md->recursive != NULL && md->recursive->group_num == 0)
918     {
919     recursion_info *rec = md->recursive;
920 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
921 nigel 77 md->recursive = rec->prevrec;
922     memmove(md->offset_vector, rec->offset_save,
923     rec->saved_max * sizeof(int));
924 ph10 168 mstart = rec->save_start;
925 nigel 77 ims = original_ims;
926     ecode = rec->after_call;
927     break;
928     }
929    
930     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
931     string - backtracking will then try other alternatives, if any. */
932    
933 ph10 168 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
934     md->end_match_ptr = eptr; /* Record where we ended */
935     md->end_offset_top = offset_top; /* and how many extracts were taken */
936 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
937 nigel 77 RRETURN(MATCH_MATCH);
938    
939     /* Change option settings */
940    
941     case OP_OPT:
942     ims = ecode[1];
943     ecode += 2;
944     DPRINTF(("ims set to %02lx\n", ims));
945     break;
946    
947     /* Assertion brackets. Check the alternative branches in turn - the
948     matching won't pass the KET for an assertion. If any one branch matches,
949     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
950     start of each branch to move the current point backwards, so the code at
951     this level is identical to the lookahead case. */
952    
953     case OP_ASSERT:
954     case OP_ASSERTBACK:
955     do
956     {
957 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
958     RM4);
959 nigel 77 if (rrc == MATCH_MATCH) break;
960 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
961 nigel 77 ecode += GET(ecode, 1);
962     }
963     while (*ecode == OP_ALT);
964     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
965    
966     /* If checking an assertion for a condition, return MATCH_MATCH. */
967    
968     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
969    
970     /* Continue from after the assertion, updating the offsets high water
971     mark, since extracts may have been taken during the assertion. */
972    
973     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
974     ecode += 1 + LINK_SIZE;
975     offset_top = md->end_offset_top;
976     continue;
977    
978     /* Negative assertion: all branches must fail to match */
979    
980     case OP_ASSERT_NOT:
981     case OP_ASSERTBACK_NOT:
982     do
983     {
984 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
985     RM5);
986 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
987 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
988 nigel 77 ecode += GET(ecode,1);
989     }
990     while (*ecode == OP_ALT);
991    
992     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
993    
994     ecode += 1 + LINK_SIZE;
995     continue;
996    
997     /* Move the subject pointer back. This occurs only at the start of
998     each branch of a lookbehind assertion. If we are too close to the start to
999     move back, this match function fails. When working with UTF-8 we move
1000     back a number of characters, not bytes. */
1001    
1002     case OP_REVERSE:
1003     #ifdef SUPPORT_UTF8
1004     if (utf8)
1005     {
1006 nigel 93 i = GET(ecode, 1);
1007     while (i-- > 0)
1008 nigel 77 {
1009     eptr--;
1010     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1011 ph10 207 BACKCHAR(eptr);
1012 nigel 77 }
1013     }
1014     else
1015     #endif
1016    
1017     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1018    
1019     {
1020 nigel 93 eptr -= GET(ecode, 1);
1021 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1022     }
1023    
1024     /* Skip to next op code */
1025    
1026     ecode += 1 + LINK_SIZE;
1027     break;
1028    
1029     /* The callout item calls an external function, if one is provided, passing
1030     details of the match so far. This is mainly for debugging, though the
1031     function is able to force a failure. */
1032    
1033     case OP_CALLOUT:
1034     if (pcre_callout != NULL)
1035     {
1036     pcre_callout_block cb;
1037     cb.version = 1; /* Version 1 of the callout block */
1038     cb.callout_number = ecode[1];
1039     cb.offset_vector = md->offset_vector;
1040 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1041 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1042 ph10 168 cb.start_match = mstart - md->start_subject;
1043 nigel 77 cb.current_position = eptr - md->start_subject;
1044     cb.pattern_position = GET(ecode, 2);
1045     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1046     cb.capture_top = offset_top/2;
1047     cb.capture_last = md->capture_last;
1048     cb.callout_data = md->callout_data;
1049     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1050     if (rrc < 0) RRETURN(rrc);
1051     }
1052     ecode += 2 + 2*LINK_SIZE;
1053     break;
1054    
1055     /* Recursion either matches the current regex, or some subexpression. The
1056     offset data is the offset to the starting bracket from the start of the
1057     whole pattern. (This is so that it works from duplicated subpatterns.)
1058    
1059     If there are any capturing brackets started but not finished, we have to
1060     save their starting points and reinstate them after the recursion. However,
1061     we don't know how many such there are (offset_top records the completed
1062     total) so we just have to save all the potential data. There may be up to
1063     65535 such values, which is too large to put on the stack, but using malloc
1064     for small numbers seems expensive. As a compromise, the stack is used when
1065     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1066     is used. A problem is what to do if the malloc fails ... there is no way of
1067     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1068     values on the stack, and accept that the rest may be wrong.
1069    
1070     There are also other values that have to be saved. We use a chained
1071     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1072     for the original version of this logic. */
1073    
1074     case OP_RECURSE:
1075     {
1076     callpat = md->start_code + GET(ecode, 1);
1077 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1078     GET2(callpat, 1 + LINK_SIZE);
1079 nigel 77
1080     /* Add to "recursing stack" */
1081    
1082     new_recursive.prevrec = md->recursive;
1083     md->recursive = &new_recursive;
1084    
1085     /* Find where to continue from afterwards */
1086    
1087     ecode += 1 + LINK_SIZE;
1088     new_recursive.after_call = ecode;
1089    
1090     /* Now save the offset data. */
1091    
1092     new_recursive.saved_max = md->offset_end;
1093     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1094     new_recursive.offset_save = stacksave;
1095     else
1096     {
1097     new_recursive.offset_save =
1098     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1099     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1100     }
1101    
1102     memcpy(new_recursive.offset_save, md->offset_vector,
1103     new_recursive.saved_max * sizeof(int));
1104 ph10 168 new_recursive.save_start = mstart;
1105     mstart = eptr;
1106 nigel 77
1107     /* OK, now we can do the recursion. For each top-level alternative we
1108     restore the offset and recursion data. */
1109    
1110     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1111 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1112 nigel 77 do
1113     {
1114 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1115     md, ims, eptrb, flags, RM6);
1116 nigel 77 if (rrc == MATCH_MATCH)
1117     {
1118 nigel 87 DPRINTF(("Recursion matched\n"));
1119 nigel 77 md->recursive = new_recursive.prevrec;
1120     if (new_recursive.offset_save != stacksave)
1121     (pcre_free)(new_recursive.offset_save);
1122     RRETURN(MATCH_MATCH);
1123     }
1124 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1125 nigel 87 {
1126     DPRINTF(("Recursion gave error %d\n", rrc));
1127 ph10 400 if (new_recursive.offset_save != stacksave)
1128     (pcre_free)(new_recursive.offset_save);
1129 nigel 87 RRETURN(rrc);
1130     }
1131 nigel 77
1132     md->recursive = &new_recursive;
1133     memcpy(md->offset_vector, new_recursive.offset_save,
1134     new_recursive.saved_max * sizeof(int));
1135     callpat += GET(callpat, 1);
1136     }
1137     while (*callpat == OP_ALT);
1138    
1139     DPRINTF(("Recursion didn't match\n"));
1140     md->recursive = new_recursive.prevrec;
1141     if (new_recursive.offset_save != stacksave)
1142     (pcre_free)(new_recursive.offset_save);
1143     RRETURN(MATCH_NOMATCH);
1144     }
1145     /* Control never reaches here */
1146    
1147     /* "Once" brackets are like assertion brackets except that after a match,
1148     the point in the subject string is not moved back. Thus there can never be
1149     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1150     Check the alternative branches in turn - the matching won't pass the KET
1151     for this kind of subpattern. If any one branch matches, we carry on as at
1152     the end of a normal bracket, leaving the subject pointer. */
1153    
1154     case OP_ONCE:
1155 nigel 91 prev = ecode;
1156     saved_eptr = eptr;
1157    
1158     do
1159 nigel 77 {
1160 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1161 nigel 91 if (rrc == MATCH_MATCH) break;
1162 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1163 nigel 91 ecode += GET(ecode,1);
1164     }
1165     while (*ecode == OP_ALT);
1166 nigel 77
1167 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1168 nigel 77
1169 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1170 nigel 77
1171 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1172     mark, since extracts may have been taken. */
1173 nigel 77
1174 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1175 nigel 77
1176 nigel 91 offset_top = md->end_offset_top;
1177     eptr = md->end_match_ptr;
1178 nigel 77
1179 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1180     happens for a repeating ket if no characters were matched in the group.
1181     This is the forcible breaking of infinite loops as implemented in Perl
1182     5.005. If there is an options reset, it will get obeyed in the normal
1183     course of events. */
1184 nigel 77
1185 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1186     {
1187     ecode += 1+LINK_SIZE;
1188     break;
1189     }
1190 nigel 77
1191 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1192     preceding bracket, in the appropriate order. The second "call" of match()
1193     uses tail recursion, to avoid using another stack frame. We need to reset
1194     any options that changed within the bracket before re-running it, so
1195     check the next opcode. */
1196 nigel 77
1197 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1198     {
1199     ims = (ims & ~PCRE_IMS) | ecode[4];
1200     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1201     }
1202 nigel 77
1203 nigel 91 if (*ecode == OP_KETRMIN)
1204     {
1205 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1206 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1207     ecode = prev;
1208 ph10 197 flags = 0;
1209 nigel 91 goto TAIL_RECURSE;
1210 nigel 77 }
1211 nigel 91 else /* OP_KETRMAX */
1212     {
1213 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1214 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1215     ecode += 1 + LINK_SIZE;
1216 ph10 197 flags = 0;
1217 nigel 91 goto TAIL_RECURSE;
1218     }
1219     /* Control never gets here */
1220 nigel 77
1221     /* An alternation is the end of a branch; scan along to find the end of the
1222     bracketed group and go to there. */
1223    
1224     case OP_ALT:
1225     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1226     break;
1227    
1228 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1229     indicating that it may occur zero times. It may repeat infinitely, or not
1230     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1231     with fixed upper repeat limits are compiled as a number of copies, with the
1232     optional ones preceded by BRAZERO or BRAMINZERO. */
1233 nigel 77
1234     case OP_BRAZERO:
1235     {
1236     next = ecode+1;
1237 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1238 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1239     do next += GET(next,1); while (*next == OP_ALT);
1240 nigel 93 ecode = next + 1 + LINK_SIZE;
1241 nigel 77 }
1242     break;
1243    
1244     case OP_BRAMINZERO:
1245     {
1246     next = ecode+1;
1247 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1248 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1249 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1250     ecode++;
1251     }
1252     break;
1253    
1254 ph10 335 case OP_SKIPZERO:
1255     {
1256     next = ecode+1;
1257     do next += GET(next,1); while (*next == OP_ALT);
1258     ecode = next + 1 + LINK_SIZE;
1259     }
1260     break;
1261    
1262 nigel 93 /* End of a group, repeated or non-repeating. */
1263 nigel 77
1264     case OP_KET:
1265     case OP_KETRMIN:
1266     case OP_KETRMAX:
1267 nigel 91 prev = ecode - GET(ecode, 1);
1268 nigel 77
1269 nigel 93 /* If this was a group that remembered the subject start, in order to break
1270     infinite repeats of empty string matches, retrieve the subject start from
1271     the chain. Otherwise, set it NULL. */
1272 nigel 77
1273 nigel 93 if (*prev >= OP_SBRA)
1274     {
1275     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1276     eptrb = eptrb->epb_prev; /* Backup to previous group */
1277     }
1278     else saved_eptr = NULL;
1279 nigel 77
1280 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1281     MATCH_MATCH, but record the current high water mark for use by positive
1282     assertions. Do this also for the "once" (atomic) groups. */
1283    
1284 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1285     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1286     *prev == OP_ONCE)
1287     {
1288     md->end_match_ptr = eptr; /* For ONCE */
1289     md->end_offset_top = offset_top;
1290     RRETURN(MATCH_MATCH);
1291     }
1292 nigel 77
1293 nigel 93 /* For capturing groups we have to check the group number back at the start
1294     and if necessary complete handling an extraction by setting the offsets and
1295     bumping the high water mark. Note that whole-pattern recursion is coded as
1296     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1297     when the OP_END is reached. Other recursion is handled here. */
1298 nigel 77
1299 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1300 nigel 91 {
1301 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1302 nigel 91 offset = number << 1;
1303 nigel 77
1304     #ifdef DEBUG
1305 nigel 91 printf("end bracket %d", number);
1306     printf("\n");
1307 nigel 77 #endif
1308    
1309 nigel 93 md->capture_last = number;
1310     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1311 nigel 91 {
1312 nigel 93 md->offset_vector[offset] =
1313     md->offset_vector[md->offset_end - number];
1314     md->offset_vector[offset+1] = eptr - md->start_subject;
1315     if (offset_top <= offset) offset_top = offset + 2;
1316     }
1317 nigel 77
1318 nigel 93 /* Handle a recursively called group. Restore the offsets
1319     appropriately and continue from after the call. */
1320 nigel 77
1321 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1322     {
1323     recursion_info *rec = md->recursive;
1324     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1325     md->recursive = rec->prevrec;
1326 ph10 168 mstart = rec->save_start;
1327 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1328     rec->saved_max * sizeof(int));
1329     ecode = rec->after_call;
1330     ims = original_ims;
1331     break;
1332 nigel 77 }
1333 nigel 91 }
1334 nigel 77
1335 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1336     flags, in case they got changed during the group. */
1337 nigel 77
1338 nigel 91 ims = original_ims;
1339     DPRINTF(("ims reset to %02lx\n", ims));
1340 nigel 77
1341 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1342     happens for a repeating ket if no characters were matched in the group.
1343     This is the forcible breaking of infinite loops as implemented in Perl
1344     5.005. If there is an options reset, it will get obeyed in the normal
1345     course of events. */
1346 nigel 77
1347 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1348     {
1349     ecode += 1 + LINK_SIZE;
1350     break;
1351     }
1352 nigel 77
1353 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1354     preceding bracket, in the appropriate order. In the second case, we can use
1355 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1356     unlimited repeat of a group that can match an empty string. */
1357 nigel 77
1358 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1359    
1360 nigel 91 if (*ecode == OP_KETRMIN)
1361     {
1362 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1363 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1364 ph10 197 if (flags != 0) /* Could match an empty string */
1365     {
1366     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1367     RRETURN(rrc);
1368     }
1369 nigel 91 ecode = prev;
1370     goto TAIL_RECURSE;
1371 nigel 77 }
1372 nigel 91 else /* OP_KETRMAX */
1373     {
1374 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1375 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1376     ecode += 1 + LINK_SIZE;
1377 ph10 197 flags = 0;
1378 nigel 91 goto TAIL_RECURSE;
1379     }
1380     /* Control never gets here */
1381 nigel 77
1382     /* Start of subject unless notbol, or after internal newline if multiline */
1383    
1384     case OP_CIRC:
1385     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1386     if ((ims & PCRE_MULTILINE) != 0)
1387     {
1388 nigel 91 if (eptr != md->start_subject &&
1389 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1390 nigel 77 RRETURN(MATCH_NOMATCH);
1391     ecode++;
1392     break;
1393     }
1394     /* ... else fall through */
1395    
1396     /* Start of subject assertion */
1397    
1398     case OP_SOD:
1399     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1400     ecode++;
1401     break;
1402    
1403     /* Start of match assertion */
1404    
1405     case OP_SOM:
1406     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1407     ecode++;
1408     break;
1409 ph10 172
1410 ph10 168 /* Reset the start of match point */
1411 ph10 172
1412 ph10 168 case OP_SET_SOM:
1413     mstart = eptr;
1414 ph10 172 ecode++;
1415     break;
1416 nigel 77
1417     /* Assert before internal newline if multiline, or before a terminating
1418     newline unless endonly is set, else end of subject unless noteol is set. */
1419    
1420     case OP_DOLL:
1421     if ((ims & PCRE_MULTILINE) != 0)
1422     {
1423     if (eptr < md->end_subject)
1424 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1425 nigel 77 else
1426     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1427     ecode++;
1428     break;
1429     }
1430     else
1431     {
1432     if (md->noteol) RRETURN(MATCH_NOMATCH);
1433     if (!md->endonly)
1434     {
1435 nigel 91 if (eptr != md->end_subject &&
1436 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1437 nigel 77 RRETURN(MATCH_NOMATCH);
1438     ecode++;
1439     break;
1440     }
1441     }
1442 nigel 91 /* ... else fall through for endonly */
1443 nigel 77
1444     /* End of subject assertion (\z) */
1445    
1446     case OP_EOD:
1447     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1448     ecode++;
1449     break;
1450    
1451     /* End of subject or ending \n assertion (\Z) */
1452    
1453     case OP_EODN:
1454 nigel 91 if (eptr != md->end_subject &&
1455 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1456 nigel 91 RRETURN(MATCH_NOMATCH);
1457 nigel 77 ecode++;
1458     break;
1459    
1460     /* Word boundary assertions */
1461    
1462     case OP_NOT_WORD_BOUNDARY:
1463     case OP_WORD_BOUNDARY:
1464     {
1465    
1466     /* Find out if the previous and current characters are "word" characters.
1467     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1468     be "non-word" characters. */
1469    
1470     #ifdef SUPPORT_UTF8
1471     if (utf8)
1472     {
1473     if (eptr == md->start_subject) prev_is_word = FALSE; else
1474     {
1475 ph10 409 USPTR lastptr = eptr - 1;
1476 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1477     GETCHAR(c, lastptr);
1478     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1479     }
1480     if (eptr >= md->end_subject) cur_is_word = FALSE; else
1481     {
1482     GETCHAR(c, eptr);
1483     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1484     }
1485     }
1486     else
1487     #endif
1488    
1489     /* More streamlined when not in UTF-8 mode */
1490    
1491     {
1492     prev_is_word = (eptr != md->start_subject) &&
1493     ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1494     cur_is_word = (eptr < md->end_subject) &&
1495     ((md->ctypes[*eptr] & ctype_word) != 0);
1496     }
1497    
1498     /* Now see if the situation is what we want */
1499    
1500     if ((*ecode++ == OP_WORD_BOUNDARY)?
1501     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1502     RRETURN(MATCH_NOMATCH);
1503     }
1504     break;
1505    
1506     /* Match a single character type; inline for speed */
1507    
1508     case OP_ANY:
1509 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1510 ph10 345 /* Fall through */
1511    
1512 ph10 341 case OP_ALLANY:
1513 nigel 77 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1514 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1515 nigel 77 ecode++;
1516     break;
1517    
1518     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1519     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1520    
1521     case OP_ANYBYTE:
1522     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1523     ecode++;
1524     break;
1525    
1526     case OP_NOT_DIGIT:
1527     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1528     GETCHARINCTEST(c, eptr);
1529     if (
1530     #ifdef SUPPORT_UTF8
1531     c < 256 &&
1532     #endif
1533     (md->ctypes[c] & ctype_digit) != 0
1534     )
1535     RRETURN(MATCH_NOMATCH);
1536     ecode++;
1537     break;
1538    
1539     case OP_DIGIT:
1540     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1541     GETCHARINCTEST(c, eptr);
1542     if (
1543     #ifdef SUPPORT_UTF8
1544     c >= 256 ||
1545     #endif
1546     (md->ctypes[c] & ctype_digit) == 0
1547     )
1548     RRETURN(MATCH_NOMATCH);
1549     ecode++;
1550     break;
1551    
1552     case OP_NOT_WHITESPACE:
1553     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1554     GETCHARINCTEST(c, eptr);
1555     if (
1556     #ifdef SUPPORT_UTF8
1557     c < 256 &&
1558     #endif
1559     (md->ctypes[c] & ctype_space) != 0
1560     )
1561     RRETURN(MATCH_NOMATCH);
1562     ecode++;
1563     break;
1564    
1565     case OP_WHITESPACE:
1566     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1567     GETCHARINCTEST(c, eptr);
1568     if (
1569     #ifdef SUPPORT_UTF8
1570     c >= 256 ||
1571     #endif
1572     (md->ctypes[c] & ctype_space) == 0
1573     )
1574     RRETURN(MATCH_NOMATCH);
1575     ecode++;
1576     break;
1577    
1578     case OP_NOT_WORDCHAR:
1579     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1580     GETCHARINCTEST(c, eptr);
1581     if (
1582     #ifdef SUPPORT_UTF8
1583     c < 256 &&
1584     #endif
1585     (md->ctypes[c] & ctype_word) != 0
1586     )
1587     RRETURN(MATCH_NOMATCH);
1588     ecode++;
1589     break;
1590    
1591     case OP_WORDCHAR:
1592     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1593     GETCHARINCTEST(c, eptr);
1594     if (
1595     #ifdef SUPPORT_UTF8
1596     c >= 256 ||
1597     #endif
1598     (md->ctypes[c] & ctype_word) == 0
1599     )
1600     RRETURN(MATCH_NOMATCH);
1601     ecode++;
1602     break;
1603    
1604 nigel 93 case OP_ANYNL:
1605     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1606     GETCHARINCTEST(c, eptr);
1607     switch(c)
1608     {
1609     default: RRETURN(MATCH_NOMATCH);
1610     case 0x000d:
1611     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1612     break;
1613 ph10 231
1614 nigel 93 case 0x000a:
1615 ph10 231 break;
1616    
1617 nigel 93 case 0x000b:
1618     case 0x000c:
1619     case 0x0085:
1620     case 0x2028:
1621     case 0x2029:
1622 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1623 nigel 93 break;
1624     }
1625     ecode++;
1626     break;
1627    
1628 ph10 178 case OP_NOT_HSPACE:
1629     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1630     GETCHARINCTEST(c, eptr);
1631     switch(c)
1632     {
1633     default: break;
1634     case 0x09: /* HT */
1635     case 0x20: /* SPACE */
1636     case 0xa0: /* NBSP */
1637     case 0x1680: /* OGHAM SPACE MARK */
1638     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1639     case 0x2000: /* EN QUAD */
1640     case 0x2001: /* EM QUAD */
1641     case 0x2002: /* EN SPACE */
1642     case 0x2003: /* EM SPACE */
1643     case 0x2004: /* THREE-PER-EM SPACE */
1644     case 0x2005: /* FOUR-PER-EM SPACE */
1645     case 0x2006: /* SIX-PER-EM SPACE */
1646     case 0x2007: /* FIGURE SPACE */
1647     case 0x2008: /* PUNCTUATION SPACE */
1648     case 0x2009: /* THIN SPACE */
1649     case 0x200A: /* HAIR SPACE */
1650     case 0x202f: /* NARROW NO-BREAK SPACE */
1651     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1652     case 0x3000: /* IDEOGRAPHIC SPACE */
1653     RRETURN(MATCH_NOMATCH);
1654     }
1655     ecode++;
1656     break;
1657    
1658     case OP_HSPACE:
1659     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1660     GETCHARINCTEST(c, eptr);
1661     switch(c)
1662     {
1663     default: RRETURN(MATCH_NOMATCH);
1664     case 0x09: /* HT */
1665     case 0x20: /* SPACE */
1666     case 0xa0: /* NBSP */
1667     case 0x1680: /* OGHAM SPACE MARK */
1668     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1669     case 0x2000: /* EN QUAD */
1670     case 0x2001: /* EM QUAD */
1671     case 0x2002: /* EN SPACE */
1672     case 0x2003: /* EM SPACE */
1673     case 0x2004: /* THREE-PER-EM SPACE */
1674     case 0x2005: /* FOUR-PER-EM SPACE */
1675     case 0x2006: /* SIX-PER-EM SPACE */
1676     case 0x2007: /* FIGURE SPACE */
1677     case 0x2008: /* PUNCTUATION SPACE */
1678     case 0x2009: /* THIN SPACE */
1679     case 0x200A: /* HAIR SPACE */
1680     case 0x202f: /* NARROW NO-BREAK SPACE */
1681     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1682     case 0x3000: /* IDEOGRAPHIC SPACE */
1683     break;
1684     }
1685     ecode++;
1686     break;
1687    
1688     case OP_NOT_VSPACE:
1689     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1690     GETCHARINCTEST(c, eptr);
1691     switch(c)
1692     {
1693     default: break;
1694     case 0x0a: /* LF */
1695     case 0x0b: /* VT */
1696     case 0x0c: /* FF */
1697     case 0x0d: /* CR */
1698     case 0x85: /* NEL */
1699     case 0x2028: /* LINE SEPARATOR */
1700     case 0x2029: /* PARAGRAPH SEPARATOR */
1701     RRETURN(MATCH_NOMATCH);
1702     }
1703     ecode++;
1704     break;
1705    
1706     case OP_VSPACE:
1707     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708     GETCHARINCTEST(c, eptr);
1709     switch(c)
1710     {
1711     default: RRETURN(MATCH_NOMATCH);
1712     case 0x0a: /* LF */
1713     case 0x0b: /* VT */
1714     case 0x0c: /* FF */
1715     case 0x0d: /* CR */
1716     case 0x85: /* NEL */
1717     case 0x2028: /* LINE SEPARATOR */
1718     case 0x2029: /* PARAGRAPH SEPARATOR */
1719     break;
1720     }
1721     ecode++;
1722     break;
1723    
1724 nigel 77 #ifdef SUPPORT_UCP
1725     /* Check the next character by Unicode property. We will get here only
1726     if the support is in the binary; otherwise a compile-time error occurs. */
1727    
1728     case OP_PROP:
1729     case OP_NOTPROP:
1730     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1731     GETCHARINCTEST(c, eptr);
1732     {
1733 ph10 384 const ucd_record *prop = GET_UCD(c);
1734 nigel 77
1735 nigel 87 switch(ecode[1])
1736     {
1737     case PT_ANY:
1738     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1739     break;
1740 nigel 77
1741 nigel 87 case PT_LAMP:
1742 ph10 349 if ((prop->chartype == ucp_Lu ||
1743     prop->chartype == ucp_Ll ||
1744     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1745 nigel 77 RRETURN(MATCH_NOMATCH);
1746 nigel 87 break;
1747    
1748     case PT_GC:
1749 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1750 nigel 77 RRETURN(MATCH_NOMATCH);
1751 nigel 87 break;
1752    
1753     case PT_PC:
1754 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1755 nigel 87 RRETURN(MATCH_NOMATCH);
1756     break;
1757    
1758     case PT_SC:
1759 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
1760 nigel 87 RRETURN(MATCH_NOMATCH);
1761     break;
1762    
1763     default:
1764     RRETURN(PCRE_ERROR_INTERNAL);
1765 nigel 77 }
1766 nigel 87
1767     ecode += 3;
1768 nigel 77 }
1769     break;
1770    
1771     /* Match an extended Unicode sequence. We will get here only if the support
1772     is in the binary; otherwise a compile-time error occurs. */
1773    
1774     case OP_EXTUNI:
1775     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1776     GETCHARINCTEST(c, eptr);
1777     {
1778 ph10 349 int category = UCD_CATEGORY(c);
1779 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1780     while (eptr < md->end_subject)
1781     {
1782     int len = 1;
1783     if (!utf8) c = *eptr; else
1784     {
1785     GETCHARLEN(c, eptr, len);
1786     }
1787 ph10 349 category = UCD_CATEGORY(c);
1788 nigel 77 if (category != ucp_M) break;
1789     eptr += len;
1790     }
1791     }
1792     ecode++;
1793     break;
1794     #endif
1795    
1796    
1797     /* Match a back reference, possibly repeatedly. Look past the end of the
1798     item to see if there is repeat information following. The code is similar
1799     to that for character classes, but repeated for efficiency. Then obey
1800     similar code to character type repeats - written out again for speed.
1801     However, if the referenced string is the empty string, always treat
1802     it as matched, any number of times (otherwise there could be infinite
1803     loops). */
1804    
1805     case OP_REF:
1806     {
1807     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1808 ph10 345 ecode += 3;
1809    
1810 ph10 336 /* If the reference is unset, there are two possibilities:
1811 ph10 345
1812 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
1813     than the amount of subject left; this ensures that every attempt at a
1814     match fails. We can't just fail here, because of the possibility of
1815     quantifiers with zero minima.
1816 ph10 345
1817     (b) If the JavaScript compatibility flag is set, set the length to zero
1818     so that the back reference matches an empty string.
1819    
1820     Otherwise, set the length to the length of what was matched by the
1821 ph10 336 referenced subpattern. */
1822 ph10 345
1823 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
1824 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1825 ph10 336 else
1826     length = md->offset_vector[offset+1] - md->offset_vector[offset];
1827 nigel 77
1828     /* Set up for repetition, or handle the non-repeated case */
1829    
1830     switch (*ecode)
1831     {
1832     case OP_CRSTAR:
1833     case OP_CRMINSTAR:
1834     case OP_CRPLUS:
1835     case OP_CRMINPLUS:
1836     case OP_CRQUERY:
1837     case OP_CRMINQUERY:
1838     c = *ecode++ - OP_CRSTAR;
1839     minimize = (c & 1) != 0;
1840     min = rep_min[c]; /* Pick up values from tables; */
1841     max = rep_max[c]; /* zero for max => infinity */
1842     if (max == 0) max = INT_MAX;
1843     break;
1844    
1845     case OP_CRRANGE:
1846     case OP_CRMINRANGE:
1847     minimize = (*ecode == OP_CRMINRANGE);
1848     min = GET2(ecode, 1);
1849     max = GET2(ecode, 3);
1850     if (max == 0) max = INT_MAX;
1851     ecode += 5;
1852     break;
1853    
1854     default: /* No repeat follows */
1855     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1856     eptr += length;
1857     continue; /* With the main loop */
1858     }
1859    
1860     /* If the length of the reference is zero, just continue with the
1861     main loop. */
1862    
1863     if (length == 0) continue;
1864    
1865     /* First, ensure the minimum number of matches are present. We get back
1866     the length of the reference string explicitly rather than passing the
1867     address of eptr, so that eptr can be a register variable. */
1868    
1869     for (i = 1; i <= min; i++)
1870     {
1871 ph10 426 if (!match_ref(offset, eptr, length, md, ims))
1872     {
1873     CHECK_PARTIAL();
1874     RRETURN(MATCH_NOMATCH);
1875     }
1876 nigel 77 eptr += length;
1877     }
1878    
1879     /* If min = max, continue at the same level without recursion.
1880     They are not both allowed to be zero. */
1881    
1882     if (min == max) continue;
1883    
1884     /* If minimizing, keep trying and advancing the pointer */
1885    
1886     if (minimize)
1887     {
1888     for (fi = min;; fi++)
1889     {
1890 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1891 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1892     if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1893 ph10 426 {
1894     CHECK_PARTIAL();
1895 nigel 77 RRETURN(MATCH_NOMATCH);
1896 ph10 426 }
1897 nigel 77 eptr += length;
1898     }
1899     /* Control never gets here */
1900     }
1901    
1902     /* If maximizing, find the longest string and work backwards */
1903    
1904     else
1905     {
1906     pp = eptr;
1907     for (i = min; i < max; i++)
1908     {
1909     if (!match_ref(offset, eptr, length, md, ims)) break;
1910     eptr += length;
1911     }
1912 ph10 426 CHECK_PARTIAL();
1913 nigel 77 while (eptr >= pp)
1914     {
1915 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1916 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1917     eptr -= length;
1918     }
1919     RRETURN(MATCH_NOMATCH);
1920     }
1921     }
1922     /* Control never gets here */
1923    
1924    
1925    
1926     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1927     used when all the characters in the class have values in the range 0-255,
1928     and either the matching is caseful, or the characters are in the range
1929     0-127 when UTF-8 processing is enabled. The only difference between
1930     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1931     encountered.
1932    
1933     First, look past the end of the item to see if there is repeat information
1934     following. Then obey similar code to character type repeats - written out
1935     again for speed. */
1936    
1937     case OP_NCLASS:
1938     case OP_CLASS:
1939     {
1940     data = ecode + 1; /* Save for matching */
1941     ecode += 33; /* Advance past the item */
1942    
1943     switch (*ecode)
1944     {
1945     case OP_CRSTAR:
1946     case OP_CRMINSTAR:
1947     case OP_CRPLUS:
1948     case OP_CRMINPLUS:
1949     case OP_CRQUERY:
1950     case OP_CRMINQUERY:
1951     c = *ecode++ - OP_CRSTAR;
1952     minimize = (c & 1) != 0;
1953     min = rep_min[c]; /* Pick up values from tables; */
1954     max = rep_max[c]; /* zero for max => infinity */
1955     if (max == 0) max = INT_MAX;
1956     break;
1957    
1958     case OP_CRRANGE:
1959     case OP_CRMINRANGE:
1960     minimize = (*ecode == OP_CRMINRANGE);
1961     min = GET2(ecode, 1);
1962     max = GET2(ecode, 3);
1963     if (max == 0) max = INT_MAX;
1964     ecode += 5;
1965     break;
1966    
1967     default: /* No repeat follows */
1968     min = max = 1;
1969     break;
1970     }
1971    
1972     /* First, ensure the minimum number of matches are present. */
1973    
1974     #ifdef SUPPORT_UTF8
1975     /* UTF-8 mode */
1976     if (utf8)
1977     {
1978     for (i = 1; i <= min; i++)
1979     {
1980 ph10 426 if (eptr >= md->end_subject)
1981     {
1982     CHECK_PARTIAL();
1983     RRETURN(MATCH_NOMATCH);
1984     }
1985 nigel 77 GETCHARINC(c, eptr);
1986     if (c > 255)
1987     {
1988     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1989     }
1990     else
1991     {
1992     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1993     }
1994     }
1995     }
1996     else
1997     #endif
1998     /* Not UTF-8 mode */
1999     {
2000     for (i = 1; i <= min; i++)
2001     {
2002 ph10 426 if (eptr >= md->end_subject)
2003     {
2004     CHECK_PARTIAL();
2005     RRETURN(MATCH_NOMATCH);
2006     }
2007 nigel 77 c = *eptr++;
2008     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2009     }
2010     }
2011    
2012     /* If max == min we can continue with the main loop without the
2013     need to recurse. */
2014    
2015     if (min == max) continue;
2016    
2017     /* If minimizing, keep testing the rest of the expression and advancing
2018     the pointer while it matches the class. */
2019    
2020     if (minimize)
2021     {
2022     #ifdef SUPPORT_UTF8
2023     /* UTF-8 mode */
2024     if (utf8)
2025     {
2026     for (fi = min;; fi++)
2027     {
2028 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2029 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2030 ph10 426 if (fi >= max)
2031     {
2032     CHECK_PARTIAL();
2033     RRETURN(MATCH_NOMATCH);
2034     }
2035     if (eptr >= md->end_subject)
2036     {
2037     SCHECK_PARTIAL();
2038     RRETURN(MATCH_NOMATCH);
2039     }
2040 nigel 77 GETCHARINC(c, eptr);
2041     if (c > 255)
2042     {
2043     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2044     }
2045     else
2046     {
2047     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2048     }
2049     }
2050     }
2051     else
2052     #endif
2053     /* Not UTF-8 mode */
2054     {
2055     for (fi = min;; fi++)
2056     {
2057 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2058 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2059 ph10 426 if (fi >= max)
2060     {
2061     CHECK_PARTIAL();
2062     RRETURN(MATCH_NOMATCH);
2063     }
2064     if (eptr >= md->end_subject)
2065     {
2066     SCHECK_PARTIAL();
2067     RRETURN(MATCH_NOMATCH);
2068     }
2069 nigel 77 c = *eptr++;
2070     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2071     }
2072     }
2073     /* Control never gets here */
2074     }
2075    
2076     /* If maximizing, find the longest possible run, then work backwards. */
2077    
2078     else
2079     {
2080     pp = eptr;
2081    
2082     #ifdef SUPPORT_UTF8
2083     /* UTF-8 mode */
2084     if (utf8)
2085     {
2086     for (i = min; i < max; i++)
2087     {
2088     int len = 1;
2089     if (eptr >= md->end_subject) break;
2090     GETCHARLEN(c, eptr, len);
2091     if (c > 255)
2092     {
2093     if (op == OP_CLASS) break;
2094     }
2095     else
2096     {
2097     if ((data[c/8] & (1 << (c&7))) == 0) break;
2098     }
2099     eptr += len;
2100     }
2101 ph10 426 CHECK_PARTIAL();
2102 nigel 77 for (;;)
2103     {
2104 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2105 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2106     if (eptr-- == pp) break; /* Stop if tried at original pos */
2107     BACKCHAR(eptr);
2108     }
2109     }
2110     else
2111     #endif
2112     /* Not UTF-8 mode */
2113     {
2114     for (i = min; i < max; i++)
2115     {
2116     if (eptr >= md->end_subject) break;
2117     c = *eptr;
2118     if ((data[c/8] & (1 << (c&7))) == 0) break;
2119     eptr++;
2120     }
2121 ph10 426 CHECK_PARTIAL();
2122 nigel 77 while (eptr >= pp)
2123     {
2124 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2125 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2126 nigel 77 eptr--;
2127     }
2128     }
2129    
2130     RRETURN(MATCH_NOMATCH);
2131     }
2132     }
2133     /* Control never gets here */
2134    
2135    
2136     /* Match an extended character class. This opcode is encountered only
2137 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2138     mode, because Unicode properties are supported in non-UTF-8 mode. */
2139 nigel 77
2140     #ifdef SUPPORT_UTF8
2141     case OP_XCLASS:
2142     {
2143     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2144     ecode += GET(ecode, 1); /* Advance past the item */
2145    
2146     switch (*ecode)
2147     {
2148     case OP_CRSTAR:
2149     case OP_CRMINSTAR:
2150     case OP_CRPLUS:
2151     case OP_CRMINPLUS:
2152     case OP_CRQUERY:
2153     case OP_CRMINQUERY:
2154     c = *ecode++ - OP_CRSTAR;
2155     minimize = (c & 1) != 0;
2156     min = rep_min[c]; /* Pick up values from tables; */
2157     max = rep_max[c]; /* zero for max => infinity */
2158     if (max == 0) max = INT_MAX;
2159     break;
2160    
2161     case OP_CRRANGE:
2162     case OP_CRMINRANGE:
2163     minimize = (*ecode == OP_CRMINRANGE);
2164     min = GET2(ecode, 1);
2165     max = GET2(ecode, 3);
2166     if (max == 0) max = INT_MAX;
2167     ecode += 5;
2168     break;
2169    
2170     default: /* No repeat follows */
2171     min = max = 1;
2172     break;
2173     }
2174    
2175     /* First, ensure the minimum number of matches are present. */
2176    
2177     for (i = 1; i <= min; i++)
2178     {
2179 ph10 426 if (eptr >= md->end_subject)
2180     {
2181     SCHECK_PARTIAL();
2182     RRETURN(MATCH_NOMATCH);
2183     }
2184 ph10 384 GETCHARINCTEST(c, eptr);
2185 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2186     }
2187    
2188     /* If max == min we can continue with the main loop without the
2189     need to recurse. */
2190    
2191     if (min == max) continue;
2192    
2193     /* If minimizing, keep testing the rest of the expression and advancing
2194     the pointer while it matches the class. */
2195    
2196     if (minimize)
2197     {
2198     for (fi = min;; fi++)
2199     {
2200 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2201 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2202 ph10 426 if (fi >= max)
2203     {
2204     CHECK_PARTIAL();
2205     RRETURN(MATCH_NOMATCH);
2206     }
2207     if (eptr >= md->end_subject)
2208     {
2209     SCHECK_PARTIAL();
2210     RRETURN(MATCH_NOMATCH);
2211     }
2212 ph10 384 GETCHARINCTEST(c, eptr);
2213 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2214     }
2215     /* Control never gets here */
2216     }
2217    
2218     /* If maximizing, find the longest possible run, then work backwards. */
2219    
2220     else
2221     {
2222     pp = eptr;
2223     for (i = min; i < max; i++)
2224     {
2225     int len = 1;
2226     if (eptr >= md->end_subject) break;
2227 ph10 384 GETCHARLENTEST(c, eptr, len);
2228 nigel 77 if (!_pcre_xclass(c, data)) break;
2229     eptr += len;
2230     }
2231 ph10 426 CHECK_PARTIAL();
2232 nigel 77 for(;;)
2233     {
2234 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2235 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2236     if (eptr-- == pp) break; /* Stop if tried at original pos */
2237 ph10 214 if (utf8) BACKCHAR(eptr);
2238 nigel 77 }
2239     RRETURN(MATCH_NOMATCH);
2240     }
2241    
2242     /* Control never gets here */
2243     }
2244     #endif /* End of XCLASS */
2245    
2246     /* Match a single character, casefully */
2247    
2248     case OP_CHAR:
2249     #ifdef SUPPORT_UTF8
2250     if (utf8)
2251     {
2252     length = 1;
2253     ecode++;
2254     GETCHARLEN(fc, ecode, length);
2255     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2256     while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2257     }
2258     else
2259     #endif
2260    
2261     /* Non-UTF-8 mode */
2262     {
2263     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2264     if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2265     ecode += 2;
2266     }
2267     break;
2268    
2269     /* Match a single character, caselessly */
2270    
2271     case OP_CHARNC:
2272     #ifdef SUPPORT_UTF8
2273     if (utf8)
2274     {
2275     length = 1;
2276     ecode++;
2277     GETCHARLEN(fc, ecode, length);
2278    
2279     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2280    
2281     /* If the pattern character's value is < 128, we have only one byte, and
2282     can use the fast lookup table. */
2283    
2284     if (fc < 128)
2285     {
2286     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2287     }
2288    
2289     /* Otherwise we must pick up the subject character */
2290    
2291     else
2292     {
2293 nigel 93 unsigned int dc;
2294 nigel 77 GETCHARINC(dc, eptr);
2295     ecode += length;
2296    
2297     /* If we have Unicode property support, we can use it to test the other
2298 nigel 87 case of the character, if there is one. */
2299 nigel 77
2300     if (fc != dc)
2301     {
2302     #ifdef SUPPORT_UCP
2303 ph10 349 if (dc != UCD_OTHERCASE(fc))
2304 nigel 77 #endif
2305     RRETURN(MATCH_NOMATCH);
2306     }
2307     }
2308     }
2309     else
2310     #endif /* SUPPORT_UTF8 */
2311    
2312     /* Non-UTF-8 mode */
2313     {
2314     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2315     if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2316     ecode += 2;
2317     }
2318     break;
2319    
2320 nigel 93 /* Match a single character repeatedly. */
2321 nigel 77
2322     case OP_EXACT:
2323     min = max = GET2(ecode, 1);
2324     ecode += 3;
2325     goto REPEATCHAR;
2326    
2327 nigel 93 case OP_POSUPTO:
2328     possessive = TRUE;
2329     /* Fall through */
2330    
2331 nigel 77 case OP_UPTO:
2332     case OP_MINUPTO:
2333     min = 0;
2334     max = GET2(ecode, 1);
2335     minimize = *ecode == OP_MINUPTO;
2336     ecode += 3;
2337     goto REPEATCHAR;
2338    
2339 nigel 93 case OP_POSSTAR:
2340     possessive = TRUE;
2341     min = 0;
2342     max = INT_MAX;
2343     ecode++;
2344     goto REPEATCHAR;
2345    
2346     case OP_POSPLUS:
2347     possessive = TRUE;
2348     min = 1;
2349     max = INT_MAX;
2350     ecode++;
2351     goto REPEATCHAR;
2352    
2353     case OP_POSQUERY:
2354     possessive = TRUE;
2355     min = 0;
2356     max = 1;
2357     ecode++;
2358     goto REPEATCHAR;
2359    
2360 nigel 77 case OP_STAR:
2361     case OP_MINSTAR:
2362     case OP_PLUS:
2363     case OP_MINPLUS:
2364     case OP_QUERY:
2365     case OP_MINQUERY:
2366     c = *ecode++ - OP_STAR;
2367     minimize = (c & 1) != 0;
2368     min = rep_min[c]; /* Pick up values from tables; */
2369     max = rep_max[c]; /* zero for max => infinity */
2370     if (max == 0) max = INT_MAX;
2371    
2372 ph10 426 /* Common code for all repeated single-character matches. */
2373 nigel 77
2374     REPEATCHAR:
2375     #ifdef SUPPORT_UTF8
2376     if (utf8)
2377     {
2378     length = 1;
2379     charptr = ecode;
2380     GETCHARLEN(fc, ecode, length);
2381     ecode += length;
2382    
2383     /* Handle multibyte character matching specially here. There is
2384     support for caseless matching if UCP support is present. */
2385    
2386     if (length > 1)
2387     {
2388     #ifdef SUPPORT_UCP
2389 nigel 93 unsigned int othercase;
2390 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2391 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2392 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2393 ph10 115 else oclength = 0;
2394 nigel 77 #endif /* SUPPORT_UCP */
2395    
2396     for (i = 1; i <= min; i++)
2397     {
2398 ph10 426 if (eptr <= md->end_subject - length &&
2399     memcmp(eptr, charptr, length) == 0) eptr += length;
2400 ph10 123 #ifdef SUPPORT_UCP
2401 ph10 426 else if (oclength > 0 &&
2402     eptr <= md->end_subject - oclength &&
2403     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2404     #endif /* SUPPORT_UCP */
2405 nigel 77 else
2406     {
2407 ph10 426 CHECK_PARTIAL();
2408     RRETURN(MATCH_NOMATCH);
2409 nigel 77 }
2410     }
2411    
2412     if (min == max) continue;
2413    
2414     if (minimize)
2415     {
2416     for (fi = min;; fi++)
2417     {
2418 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2419 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2420 ph10 426 if (fi >= max)
2421     {
2422     CHECK_PARTIAL();
2423     RRETURN(MATCH_NOMATCH);
2424     }
2425     if (eptr <= md->end_subject - length &&
2426     memcmp(eptr, charptr, length) == 0) eptr += length;
2427 ph10 123 #ifdef SUPPORT_UCP
2428 ph10 426 else if (oclength > 0 &&
2429     eptr <= md->end_subject - oclength &&
2430     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2431     #endif /* SUPPORT_UCP */
2432 nigel 77 else
2433     {
2434 ph10 426 CHECK_PARTIAL();
2435     RRETURN(MATCH_NOMATCH);
2436 nigel 77 }
2437     }
2438     /* Control never gets here */
2439     }
2440 nigel 93
2441     else /* Maximize */
2442 nigel 77 {
2443     pp = eptr;
2444     for (i = min; i < max; i++)
2445     {
2446 ph10 426 if (eptr <= md->end_subject - length &&
2447     memcmp(eptr, charptr, length) == 0) eptr += length;
2448 ph10 123 #ifdef SUPPORT_UCP
2449 ph10 426 else if (oclength > 0 &&
2450     eptr <= md->end_subject - oclength &&
2451     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2452     #endif /* SUPPORT_UCP */
2453 ph10 115 else break;
2454 nigel 77 }
2455 nigel 93
2456 ph10 426 CHECK_PARTIAL();
2457 nigel 93 if (possessive) continue;
2458 ph10 426
2459 ph10 120 for(;;)
2460 ph10 426 {
2461     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2462     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2463     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2464 ph10 115 #ifdef SUPPORT_UCP
2465 ph10 426 eptr--;
2466     BACKCHAR(eptr);
2467 ph10 123 #else /* without SUPPORT_UCP */
2468 ph10 426 eptr -= length;
2469 ph10 123 #endif /* SUPPORT_UCP */
2470 ph10 426 }
2471 nigel 77 }
2472     /* Control never gets here */
2473     }
2474    
2475     /* If the length of a UTF-8 character is 1, we fall through here, and
2476     obey the code as for non-UTF-8 characters below, though in this case the
2477     value of fc will always be < 128. */
2478     }
2479     else
2480     #endif /* SUPPORT_UTF8 */
2481    
2482     /* When not in UTF-8 mode, load a single-byte character. */
2483    
2484 ph10 426 fc = *ecode++;
2485    
2486 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2487     may not be in UTF-8 mode. The code is duplicated for the caseless and
2488     caseful cases, for speed, since matching characters is likely to be quite
2489     common. First, ensure the minimum number of matches are present. If min =
2490     max, continue at the same level without recursing. Otherwise, if
2491     minimizing, keep trying the rest of the expression and advancing one
2492     matching character if failing, up to the maximum. Alternatively, if
2493     maximizing, find the maximum number of characters and work backwards. */
2494    
2495     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2496     max, eptr));
2497    
2498     if ((ims & PCRE_CASELESS) != 0)
2499     {
2500     fc = md->lcc[fc];
2501     for (i = 1; i <= min; i++)
2502 ph10 426 {
2503     if (eptr >= md->end_subject)
2504     {
2505     SCHECK_PARTIAL();
2506     RRETURN(MATCH_NOMATCH);
2507     }
2508 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2509 ph10 426 }
2510 nigel 77 if (min == max) continue;
2511     if (minimize)
2512     {
2513     for (fi = min;; fi++)
2514     {
2515 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2516 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2517 ph10 426 if (fi >= max)
2518     {
2519     CHECK_PARTIAL();
2520 nigel 77 RRETURN(MATCH_NOMATCH);
2521 ph10 426 }
2522     if (eptr >= md->end_subject)
2523     {
2524     SCHECK_PARTIAL();
2525     RRETURN(MATCH_NOMATCH);
2526     }
2527     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2528 nigel 77 }
2529     /* Control never gets here */
2530     }
2531 nigel 93 else /* Maximize */
2532 nigel 77 {
2533     pp = eptr;
2534     for (i = min; i < max; i++)
2535     {
2536     if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2537     eptr++;
2538     }
2539 ph10 426
2540     CHECK_PARTIAL();
2541 nigel 93 if (possessive) continue;
2542 ph10 426
2543 nigel 77 while (eptr >= pp)
2544     {
2545 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2546 nigel 77 eptr--;
2547     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2548     }
2549     RRETURN(MATCH_NOMATCH);
2550     }
2551     /* Control never gets here */
2552     }
2553    
2554     /* Caseful comparisons (includes all multi-byte characters) */
2555    
2556     else
2557     {
2558 ph10 426 for (i = 1; i <= min; i++)
2559     {
2560     if (eptr >= md->end_subject)
2561     {
2562     SCHECK_PARTIAL();
2563     RRETURN(MATCH_NOMATCH);
2564     }
2565     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2566     }
2567 nigel 77 if (min == max) continue;
2568     if (minimize)
2569     {
2570     for (fi = min;; fi++)
2571     {
2572 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2573 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574 ph10 426 if (fi >= max)
2575     {
2576     CHECK_PARTIAL();
2577 nigel 77 RRETURN(MATCH_NOMATCH);
2578 ph10 426 }
2579     if (eptr >= md->end_subject)
2580     {
2581     SCHECK_PARTIAL();
2582     RRETURN(MATCH_NOMATCH);
2583     }
2584     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2585 nigel 77 }
2586     /* Control never gets here */
2587     }
2588 nigel 93 else /* Maximize */
2589 nigel 77 {
2590     pp = eptr;
2591     for (i = min; i < max; i++)
2592     {
2593     if (eptr >= md->end_subject || fc != *eptr) break;
2594     eptr++;
2595     }
2596 ph10 426 CHECK_PARTIAL();
2597 nigel 93 if (possessive) continue;
2598 nigel 77 while (eptr >= pp)
2599     {
2600 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2601 nigel 77 eptr--;
2602     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2603     }
2604     RRETURN(MATCH_NOMATCH);
2605     }
2606     }
2607     /* Control never gets here */
2608    
2609     /* Match a negated single one-byte character. The character we are
2610     checking can be multibyte. */
2611    
2612     case OP_NOT:
2613     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2614     ecode++;
2615     GETCHARINCTEST(c, eptr);
2616     if ((ims & PCRE_CASELESS) != 0)
2617     {
2618     #ifdef SUPPORT_UTF8
2619     if (c < 256)
2620     #endif
2621     c = md->lcc[c];
2622     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2623     }
2624     else
2625     {
2626     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2627     }
2628     break;
2629    
2630     /* Match a negated single one-byte character repeatedly. This is almost a
2631     repeat of the code for a repeated single character, but I haven't found a
2632     nice way of commoning these up that doesn't require a test of the
2633     positive/negative option for each character match. Maybe that wouldn't add
2634     very much to the time taken, but character matching *is* what this is all
2635     about... */
2636    
2637     case OP_NOTEXACT:
2638     min = max = GET2(ecode, 1);
2639     ecode += 3;
2640     goto REPEATNOTCHAR;
2641    
2642     case OP_NOTUPTO:
2643     case OP_NOTMINUPTO:
2644     min = 0;
2645     max = GET2(ecode, 1);
2646     minimize = *ecode == OP_NOTMINUPTO;
2647     ecode += 3;
2648     goto REPEATNOTCHAR;
2649    
2650 nigel 93 case OP_NOTPOSSTAR:
2651     possessive = TRUE;
2652     min = 0;
2653     max = INT_MAX;
2654     ecode++;
2655     goto REPEATNOTCHAR;
2656    
2657     case OP_NOTPOSPLUS:
2658     possessive = TRUE;
2659     min = 1;
2660     max = INT_MAX;
2661     ecode++;
2662     goto REPEATNOTCHAR;
2663    
2664     case OP_NOTPOSQUERY:
2665     possessive = TRUE;
2666     min = 0;
2667     max = 1;
2668     ecode++;
2669     goto REPEATNOTCHAR;
2670    
2671     case OP_NOTPOSUPTO:
2672     possessive = TRUE;
2673     min = 0;
2674     max = GET2(ecode, 1);
2675     ecode += 3;
2676     goto REPEATNOTCHAR;
2677    
2678 nigel 77 case OP_NOTSTAR:
2679     case OP_NOTMINSTAR:
2680     case OP_NOTPLUS:
2681     case OP_NOTMINPLUS:
2682     case OP_NOTQUERY:
2683     case OP_NOTMINQUERY:
2684     c = *ecode++ - OP_NOTSTAR;
2685     minimize = (c & 1) != 0;
2686     min = rep_min[c]; /* Pick up values from tables; */
2687     max = rep_max[c]; /* zero for max => infinity */
2688     if (max == 0) max = INT_MAX;
2689    
2690 ph10 426 /* Common code for all repeated single-byte matches. */
2691 nigel 77
2692     REPEATNOTCHAR:
2693     fc = *ecode++;
2694    
2695     /* The code is duplicated for the caseless and caseful cases, for speed,
2696     since matching characters is likely to be quite common. First, ensure the
2697     minimum number of matches are present. If min = max, continue at the same
2698     level without recursing. Otherwise, if minimizing, keep trying the rest of
2699     the expression and advancing one matching character if failing, up to the
2700     maximum. Alternatively, if maximizing, find the maximum number of
2701     characters and work backwards. */
2702    
2703     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2704     max, eptr));
2705    
2706     if ((ims & PCRE_CASELESS) != 0)
2707     {
2708     fc = md->lcc[fc];
2709    
2710     #ifdef SUPPORT_UTF8
2711     /* UTF-8 mode */
2712     if (utf8)
2713     {
2714 nigel 93 register unsigned int d;
2715 nigel 77 for (i = 1; i <= min; i++)
2716     {
2717 ph10 426 if (eptr >= md->end_subject)
2718     {
2719     SCHECK_PARTIAL();
2720     RRETURN(MATCH_NOMATCH);
2721     }
2722 nigel 77 GETCHARINC(d, eptr);
2723     if (d < 256) d = md->lcc[d];
2724     if (fc == d) RRETURN(MATCH_NOMATCH);
2725     }
2726     }
2727     else
2728     #endif
2729    
2730     /* Not UTF-8 mode */
2731     {
2732     for (i = 1; i <= min; i++)
2733 ph10 426 {
2734     if (eptr >= md->end_subject)
2735     {
2736     SCHECK_PARTIAL();
2737     RRETURN(MATCH_NOMATCH);
2738     }
2739 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2740 ph10 426 }
2741 nigel 77 }
2742    
2743     if (min == max) continue;
2744    
2745     if (minimize)
2746     {
2747     #ifdef SUPPORT_UTF8
2748     /* UTF-8 mode */
2749     if (utf8)
2750     {
2751 nigel 93 register unsigned int d;
2752 nigel 77 for (fi = min;; fi++)
2753     {
2754 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2755 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2756 ph10 426 if (fi >= max)
2757     {
2758     CHECK_PARTIAL();
2759     RRETURN(MATCH_NOMATCH);
2760     }
2761     if (eptr >= md->end_subject)
2762     {
2763     SCHECK_PARTIAL();
2764     RRETURN(MATCH_NOMATCH);
2765     }
2766 nigel 77 GETCHARINC(d, eptr);
2767     if (d < 256) d = md->lcc[d];
2768 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
2769 nigel 77 }
2770     }
2771     else
2772     #endif
2773     /* Not UTF-8 mode */
2774     {
2775     for (fi = min;; fi++)
2776     {
2777 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2778 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2779 ph10 426 if (fi >= max)
2780     {
2781     CHECK_PARTIAL();
2782 nigel 77 RRETURN(MATCH_NOMATCH);
2783 ph10 426 }
2784     if (eptr >= md->end_subject)
2785     {
2786     SCHECK_PARTIAL();
2787     RRETURN(MATCH_NOMATCH);
2788     }
2789     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2790 nigel 77 }
2791     }
2792     /* Control never gets here */
2793     }
2794    
2795     /* Maximize case */
2796    
2797     else
2798     {
2799     pp = eptr;
2800    
2801     #ifdef SUPPORT_UTF8
2802     /* UTF-8 mode */
2803     if (utf8)
2804     {
2805 nigel 93 register unsigned int d;
2806 nigel 77 for (i = min; i < max; i++)
2807     {
2808     int len = 1;
2809     if (eptr >= md->end_subject) break;
2810     GETCHARLEN(d, eptr, len);
2811     if (d < 256) d = md->lcc[d];
2812     if (fc == d) break;
2813     eptr += len;
2814     }
2815 ph10 426 CHECK_PARTIAL();
2816 nigel 93 if (possessive) continue;
2817     for(;;)
2818 nigel 77 {
2819 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2820 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2821     if (eptr-- == pp) break; /* Stop if tried at original pos */
2822     BACKCHAR(eptr);
2823     }
2824     }
2825     else
2826     #endif
2827     /* Not UTF-8 mode */
2828     {
2829     for (i = min; i < max; i++)
2830     {
2831     if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2832     eptr++;
2833     }
2834 ph10 426 CHECK_PARTIAL();
2835 nigel 93 if (possessive) continue;
2836 nigel 77 while (eptr >= pp)
2837     {
2838 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2839 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2840     eptr--;
2841     }
2842     }
2843    
2844     RRETURN(MATCH_NOMATCH);
2845     }
2846     /* Control never gets here */
2847     }
2848    
2849     /* Caseful comparisons */
2850    
2851     else
2852     {
2853     #ifdef SUPPORT_UTF8
2854     /* UTF-8 mode */
2855     if (utf8)
2856     {
2857 nigel 93 register unsigned int d;
2858 nigel 77 for (i = 1; i <= min; i++)
2859     {
2860 ph10 426 if (eptr >= md->end_subject)
2861     {
2862     SCHECK_PARTIAL();
2863     RRETURN(MATCH_NOMATCH);
2864     }
2865 nigel 77 GETCHARINC(d, eptr);
2866     if (fc == d) RRETURN(MATCH_NOMATCH);
2867     }
2868     }
2869     else
2870     #endif
2871     /* Not UTF-8 mode */
2872     {
2873     for (i = 1; i <= min; i++)
2874 ph10 426 {
2875     if (eptr >= md->end_subject)
2876     {
2877     SCHECK_PARTIAL();
2878     RRETURN(MATCH_NOMATCH);
2879     }
2880 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2881 ph10 426 }
2882 nigel 77 }
2883    
2884     if (min == max) continue;
2885    
2886     if (minimize)
2887     {
2888     #ifdef SUPPORT_UTF8
2889     /* UTF-8 mode */
2890     if (utf8)
2891     {
2892 nigel 93 register unsigned int d;
2893 nigel 77 for (fi = min;; fi++)
2894     {
2895 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2896 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2897 ph10 426 if (fi >= max)
2898     {
2899     CHECK_PARTIAL();
2900     RRETURN(MATCH_NOMATCH);
2901     }
2902     if (eptr >= md->end_subject)
2903     {
2904     SCHECK_PARTIAL();
2905     RRETURN(MATCH_NOMATCH);
2906     }
2907 nigel 77 GETCHARINC(d, eptr);
2908 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
2909 nigel 77 }
2910     }
2911     else
2912     #endif
2913     /* Not UTF-8 mode */
2914     {
2915     for (fi = min;; fi++)
2916     {
2917 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2918 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2919 ph10 426 if (fi >= max)
2920     {
2921     CHECK_PARTIAL();
2922 nigel 77 RRETURN(MATCH_NOMATCH);
2923 ph10 426 }
2924     if (eptr >= md->end_subject)
2925     {
2926     SCHECK_PARTIAL();
2927     RRETURN(MATCH_NOMATCH);
2928     }
2929     if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2930 nigel 77 }
2931     }
2932     /* Control never gets here */
2933     }
2934    
2935     /* Maximize case */
2936    
2937     else
2938     {
2939     pp = eptr;
2940    
2941     #ifdef SUPPORT_UTF8
2942     /* UTF-8 mode */
2943     if (utf8)
2944     {
2945 nigel 93 register unsigned int d;
2946 nigel 77 for (i = min; i < max; i++)
2947     {
2948     int len = 1;
2949     if (eptr >= md->end_subject) break;
2950     GETCHARLEN(d, eptr, len);
2951     if (fc == d) break;
2952     eptr += len;
2953     }
2954 ph10 426 CHECK_PARTIAL();
2955 nigel 93 if (possessive) continue;
2956 nigel 77 for(;;)
2957     {
2958 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2959 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2960     if (eptr-- == pp) break; /* Stop if tried at original pos */
2961     BACKCHAR(eptr);
2962     }
2963     }
2964     else
2965     #endif
2966     /* Not UTF-8 mode */
2967     {
2968     for (i = min; i < max; i++)
2969     {
2970     if (eptr >= md->end_subject || fc == *eptr) break;
2971     eptr++;
2972     }
2973 ph10 426 CHECK_PARTIAL();
2974 nigel 93 if (possessive) continue;
2975 nigel 77 while (eptr >= pp)
2976     {
2977 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2978 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2979     eptr--;
2980     }
2981     }
2982    
2983     RRETURN(MATCH_NOMATCH);
2984     }
2985     }
2986     /* Control never gets here */
2987    
2988     /* Match a single character type repeatedly; several different opcodes
2989     share code. This is very similar to the code for single characters, but we
2990     repeat it in the interests of efficiency. */
2991    
2992     case OP_TYPEEXACT:
2993     min = max = GET2(ecode, 1);
2994     minimize = TRUE;
2995     ecode += 3;
2996     goto REPEATTYPE;
2997    
2998     case OP_TYPEUPTO:
2999     case OP_TYPEMINUPTO:
3000     min = 0;
3001     max = GET2(ecode, 1);
3002     minimize = *ecode == OP_TYPEMINUPTO;
3003     ecode += 3;
3004     goto REPEATTYPE;
3005    
3006 nigel 93 case OP_TYPEPOSSTAR:
3007     possessive = TRUE;
3008     min = 0;
3009     max = INT_MAX;
3010     ecode++;
3011     goto REPEATTYPE;
3012    
3013     case OP_TYPEPOSPLUS:
3014     possessive = TRUE;
3015     min = 1;
3016     max = INT_MAX;
3017     ecode++;
3018     goto REPEATTYPE;
3019    
3020     case OP_TYPEPOSQUERY:
3021     possessive = TRUE;
3022     min = 0;
3023     max = 1;
3024     ecode++;
3025     goto REPEATTYPE;
3026    
3027     case OP_TYPEPOSUPTO:
3028     possessive = TRUE;
3029     min = 0;
3030     max = GET2(ecode, 1);
3031     ecode += 3;
3032     goto REPEATTYPE;
3033    
3034 nigel 77 case OP_TYPESTAR:
3035     case OP_TYPEMINSTAR:
3036     case OP_TYPEPLUS:
3037     case OP_TYPEMINPLUS:
3038     case OP_TYPEQUERY:
3039     case OP_TYPEMINQUERY:
3040     c = *ecode++ - OP_TYPESTAR;
3041     minimize = (c & 1) != 0;
3042     min = rep_min[c]; /* Pick up values from tables; */
3043     max = rep_max[c]; /* zero for max => infinity */
3044     if (max == 0) max = INT_MAX;
3045    
3046     /* Common code for all repeated single character type matches. Note that
3047     in UTF-8 mode, '.' matches a character of any length, but for the other
3048     character types, the valid characters are all one-byte long. */
3049    
3050     REPEATTYPE:
3051     ctype = *ecode++; /* Code for the character type */
3052    
3053     #ifdef SUPPORT_UCP
3054     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3055     {
3056     prop_fail_result = ctype == OP_NOTPROP;
3057     prop_type = *ecode++;
3058 nigel 87 prop_value = *ecode++;
3059 nigel 77 }
3060     else prop_type = -1;
3061     #endif
3062    
3063     /* First, ensure the minimum number of matches are present. Use inline
3064     code for maximizing the speed, and do the type test once at the start
3065 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3066 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3067     and single-bytes. */
3068    
3069     if (min > 0)
3070     {
3071     #ifdef SUPPORT_UCP
3072 nigel 87 if (prop_type >= 0)
3073 nigel 77 {
3074 nigel 87 switch(prop_type)
3075 nigel 77 {
3076 nigel 87 case PT_ANY:
3077     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3078     for (i = 1; i <= min; i++)
3079     {
3080 ph10 426 if (eptr >= md->end_subject)
3081     {
3082     SCHECK_PARTIAL();
3083     RRETURN(MATCH_NOMATCH);
3084     }
3085 ph10 184 GETCHARINCTEST(c, eptr);
3086 nigel 87 }
3087     break;
3088    
3089     case PT_LAMP:
3090     for (i = 1; i <= min; i++)
3091     {
3092 ph10 426 if (eptr >= md->end_subject)
3093     {
3094     SCHECK_PARTIAL();
3095     RRETURN(MATCH_NOMATCH);
3096     }
3097 ph10 184 GETCHARINCTEST(c, eptr);
3098 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3099 nigel 87 if ((prop_chartype == ucp_Lu ||
3100     prop_chartype == ucp_Ll ||
3101     prop_chartype == ucp_Lt) == prop_fail_result)
3102     RRETURN(MATCH_NOMATCH);
3103     }
3104     break;
3105    
3106     case PT_GC:
3107     for (i = 1; i <= min; i++)
3108     {
3109 ph10 426 if (eptr >= md->end_subject)
3110     {
3111     SCHECK_PARTIAL();
3112     RRETURN(MATCH_NOMATCH);
3113     }
3114 ph10 184 GETCHARINCTEST(c, eptr);
3115 ph10 349 prop_category = UCD_CATEGORY(c);
3116 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3117     RRETURN(MATCH_NOMATCH);
3118     }
3119     break;
3120    
3121     case PT_PC:
3122     for (i = 1; i <= min; i++)
3123     {
3124 ph10 426 if (eptr >= md->end_subject)
3125     {
3126     SCHECK_PARTIAL();
3127     RRETURN(MATCH_NOMATCH);
3128     }
3129 ph10 184 GETCHARINCTEST(c, eptr);
3130 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3131 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3132     RRETURN(MATCH_NOMATCH);
3133     }
3134     break;
3135    
3136     case PT_SC:
3137     for (i = 1; i <= min; i++)
3138     {
3139 ph10 426 if (eptr >= md->end_subject)
3140     {
3141     SCHECK_PARTIAL();
3142     RRETURN(MATCH_NOMATCH);
3143     }
3144 ph10 184 GETCHARINCTEST(c, eptr);
3145 ph10 349 prop_script = UCD_SCRIPT(c);
3146 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3147     RRETURN(MATCH_NOMATCH);
3148     }
3149     break;
3150    
3151     default:
3152     RRETURN(PCRE_ERROR_INTERNAL);
3153 nigel 77 }
3154     }
3155    
3156     /* Match extended Unicode sequences. We will get here only if the
3157     support is in the binary; otherwise a compile-time error occurs. */
3158    
3159     else if (ctype == OP_EXTUNI)
3160     {
3161     for (i = 1; i <= min; i++)
3162     {
3163 ph10 426 if (eptr >= md->end_subject)
3164     {
3165     SCHECK_PARTIAL();
3166     RRETURN(MATCH_NOMATCH);
3167     }
3168 nigel 77 GETCHARINCTEST(c, eptr);
3169 ph10 349 prop_category = UCD_CATEGORY(c);
3170 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3171     while (eptr < md->end_subject)
3172     {
3173     int len = 1;
3174 ph10 426 if (!utf8) c = *eptr;
3175     else { GETCHARLEN(c, eptr, len); }
3176 ph10 349 prop_category = UCD_CATEGORY(c);
3177 nigel 77 if (prop_category != ucp_M) break;
3178     eptr += len;
3179     }
3180     }
3181     }
3182    
3183     else
3184     #endif /* SUPPORT_UCP */
3185    
3186     /* Handle all other cases when the coding is UTF-8 */
3187    
3188     #ifdef SUPPORT_UTF8
3189     if (utf8) switch(ctype)
3190     {
3191     case OP_ANY:
3192     for (i = 1; i <= min; i++)
3193     {
3194 ph10 426 if (eptr >= md->end_subject)
3195     {
3196     SCHECK_PARTIAL();
3197 nigel 77 RRETURN(MATCH_NOMATCH);
3198 ph10 426 }
3199     if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3200 nigel 91 eptr++;
3201 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3202     }
3203     break;
3204    
3205 ph10 341 case OP_ALLANY:
3206     for (i = 1; i <= min; i++)
3207     {
3208 ph10 426 if (eptr >= md->end_subject)
3209     {
3210     SCHECK_PARTIAL();
3211     RRETURN(MATCH_NOMATCH);
3212     }
3213 ph10 341 eptr++;
3214     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3215     }
3216     break;
3217    
3218 nigel 77 case OP_ANYBYTE:
3219 ph10 426 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3220 nigel 77 eptr += min;
3221     break;
3222    
3223 nigel 93 case OP_ANYNL:
3224     for (i = 1; i <= min; i++)
3225     {
3226 ph10 426 if (eptr >= md->end_subject)
3227     {
3228     SCHECK_PARTIAL();
3229     RRETURN(MATCH_NOMATCH);
3230     }
3231 nigel 93 GETCHARINC(c, eptr);
3232     switch(c)
3233     {
3234     default: RRETURN(MATCH_NOMATCH);
3235     case 0x000d:
3236     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3237     break;
3238 ph10 231
3239 nigel 93 case 0x000a:
3240 ph10 231 break;
3241    
3242 nigel 93 case 0x000b:
3243     case 0x000c:
3244     case 0x0085:
3245     case 0x2028:
3246     case 0x2029:
3247 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3248 nigel 93 break;
3249     }
3250     }
3251     break;
3252    
3253 ph10 178 case OP_NOT_HSPACE:
3254     for (i = 1; i <= min; i++)
3255     {
3256 ph10 426 if (eptr >= md->end_subject)
3257     {
3258     SCHECK_PARTIAL();
3259     RRETURN(MATCH_NOMATCH);
3260     }
3261 ph10 178 GETCHARINC(c, eptr);
3262     switch(c)
3263     {
3264     default: break;
3265     case 0x09: /* HT */
3266     case 0x20: /* SPACE */
3267     case 0xa0: /* NBSP */
3268     case 0x1680: /* OGHAM SPACE MARK */
3269     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3270     case 0x2000: /* EN QUAD */
3271     case 0x2001: /* EM QUAD */
3272     case 0x2002: /* EN SPACE */
3273     case 0x2003: /* EM SPACE */
3274     case 0x2004: /* THREE-PER-EM SPACE */
3275     case 0x2005: /* FOUR-PER-EM SPACE */
3276     case 0x2006: /* SIX-PER-EM SPACE */
3277     case 0x2007: /* FIGURE SPACE */
3278     case 0x2008: /* PUNCTUATION SPACE */
3279     case 0x2009: /* THIN SPACE */
3280     case 0x200A: /* HAIR SPACE */
3281     case 0x202f: /* NARROW NO-BREAK SPACE */
3282     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3283     case 0x3000: /* IDEOGRAPHIC SPACE */
3284     RRETURN(MATCH_NOMATCH);
3285     }
3286     }
3287     break;
3288 ph10 182
3289 ph10 178 case OP_HSPACE:
3290     for (i = 1; i <= min; i++)
3291     {
3292 ph10 426 if (eptr >= md->end_subject)
3293     {
3294     SCHECK_PARTIAL();
3295     RRETURN(MATCH_NOMATCH);
3296     }
3297 ph10 178 GETCHARINC(c, eptr);
3298     switch(c)
3299     {
3300     default: RRETURN(MATCH_NOMATCH);
3301     case 0x09: /* HT */
3302     case 0x20: /* SPACE */
3303     case 0xa0: /* NBSP */
3304     case 0x1680: /* OGHAM SPACE MARK */
3305     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3306     case 0x2000: /* EN QUAD */
3307     case 0x2001: /* EM QUAD */
3308     case 0x2002: /* EN SPACE */
3309     case 0x2003: /* EM SPACE */
3310     case 0x2004: /* THREE-PER-EM SPACE */
3311     case 0x2005: /* FOUR-PER-EM SPACE */
3312     case 0x2006: /* SIX-PER-EM SPACE */
3313     case 0x2007: /* FIGURE SPACE */
3314     case 0x2008: /* PUNCTUATION SPACE */
3315     case 0x2009: /* THIN SPACE */
3316     case 0x200A: /* HAIR SPACE */
3317     case 0x202f: /* NARROW NO-BREAK SPACE */
3318     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3319     case 0x3000: /* IDEOGRAPHIC SPACE */
3320     break;
3321     }
3322     }
3323     break;
3324 ph10 182
3325 ph10 178 case OP_NOT_VSPACE:
3326     for (i = 1; i <= min; i++)
3327     {
3328 ph10 426 if (eptr >= md->end_subject)
3329     {
3330     SCHECK_PARTIAL();
3331     RRETURN(MATCH_NOMATCH);
3332     }
3333 ph10 178 GETCHARINC(c, eptr);
3334     switch(c)
3335     {
3336     default: break;
3337     case 0x0a: /* LF */
3338     case 0x0b: /* VT */
3339     case 0x0c: /* FF */
3340     case 0x0d: /* CR */
3341     case 0x85: /* NEL */
3342     case 0x2028: /* LINE SEPARATOR */
3343     case 0x2029: /* PARAGRAPH SEPARATOR */
3344     RRETURN(MATCH_NOMATCH);
3345     }
3346     }
3347     break;
3348 ph10 182
3349 ph10 178 case OP_VSPACE:
3350     for (i = 1; i <= min; i++)
3351     {
3352 ph10 426 if (eptr >= md->end_subject)
3353     {
3354     SCHECK_PARTIAL();
3355     RRETURN(MATCH_NOMATCH);
3356     }
3357 ph10 178 GETCHARINC(c, eptr);
3358     switch(c)
3359     {
3360     default: RRETURN(MATCH_NOMATCH);
3361     case 0x0a: /* LF */
3362     case 0x0b: /* VT */
3363     case 0x0c: /* FF */
3364     case 0x0d: /* CR */
3365     case 0x85: /* NEL */
3366     case 0x2028: /* LINE SEPARATOR */
3367     case 0x2029: /* PARAGRAPH SEPARATOR */
3368 ph10 182 break;
3369 ph10 178 }
3370     }
3371     break;
3372    
3373 nigel 77 case OP_NOT_DIGIT:
3374     for (i = 1; i <= min; i++)
3375     {
3376 ph10 426 if (eptr >= md->end_subject)
3377     {
3378     SCHECK_PARTIAL();
3379     RRETURN(MATCH_NOMATCH);
3380     }
3381 nigel 77 GETCHARINC(c, eptr);
3382     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3383     RRETURN(MATCH_NOMATCH);
3384     }
3385     break;
3386    
3387     case OP_DIGIT:
3388     for (i = 1; i <= min; i++)
3389     {
3390 ph10 426 if (eptr >= md->end_subject)
3391     {
3392     SCHECK_PARTIAL();
3393 nigel 77 RRETURN(MATCH_NOMATCH);
3394 ph10 426 }
3395     if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3396     RRETURN(MATCH_NOMATCH);
3397 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3398     }
3399     break;
3400    
3401     case OP_NOT_WHITESPACE:
3402     for (i = 1; i <= min; i++)
3403     {
3404 ph10 426 if (eptr >= md->end_subject)
3405     {
3406     SCHECK_PARTIAL();
3407 nigel 77 RRETURN(MATCH_NOMATCH);
3408 ph10 426 }
3409     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3410     RRETURN(MATCH_NOMATCH);
3411 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3412 nigel 77 }
3413     break;
3414    
3415     case OP_WHITESPACE:
3416     for (i = 1; i <= min; i++)
3417     {
3418 ph10 426 if (eptr >= md->end_subject)
3419     {
3420     SCHECK_PARTIAL();
3421 nigel 77 RRETURN(MATCH_NOMATCH);
3422 ph10 426 }
3423     if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3424     RRETURN(MATCH_NOMATCH);
3425 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3426     }
3427     break;
3428    
3429     case OP_NOT_WORDCHAR:
3430     for (i = 1; i <= min; i++)
3431     {
3432     if (eptr >= md->end_subject ||
3433 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3434 nigel 77 RRETURN(MATCH_NOMATCH);
3435 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3436 nigel 77 }
3437     break;
3438    
3439     case OP_WORDCHAR:
3440     for (i = 1; i <= min; i++)
3441     {
3442 ph10 426 if (eptr >= md->end_subject)
3443     {
3444     SCHECK_PARTIAL();
3445 nigel 77 RRETURN(MATCH_NOMATCH);
3446 ph10 426 }
3447     if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3448     RRETURN(MATCH_NOMATCH);
3449 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3450     }
3451     break;
3452    
3453     default:
3454     RRETURN(PCRE_ERROR_INTERNAL);
3455     } /* End switch(ctype) */
3456    
3457     else
3458     #endif /* SUPPORT_UTF8 */
3459    
3460     /* Code for the non-UTF-8 case for minimum matching of operators other
3461 ph10 426 than OP_PROP and OP_NOTPROP. */
3462 nigel 77
3463     switch(ctype)
3464     {
3465     case OP_ANY:
3466 ph10 342 for (i = 1; i <= min; i++)
3467 nigel 77 {
3468 ph10 426 if (eptr >= md->end_subject)
3469     {
3470     SCHECK_PARTIAL();
3471     RRETURN(MATCH_NOMATCH);
3472     }
3473 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3474     eptr++;
3475 nigel 77 }
3476     break;
3477    
3478 ph10 341 case OP_ALLANY:
3479 ph10 426 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3480 ph10 341 eptr += min;
3481     break;
3482    
3483 nigel 77 case OP_ANYBYTE:
3484 ph10 426 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3485 nigel 77 eptr += min;
3486     break;
3487    
3488 nigel 93 case OP_ANYNL:
3489     for (i = 1; i <= min; i++)
3490     {
3491 ph10 426 if (eptr >= md->end_subject)
3492     {
3493     SCHECK_PARTIAL();
3494     RRETURN(MATCH_NOMATCH);
3495     }
3496 nigel 93 switch(*eptr++)
3497     {
3498     default: RRETURN(MATCH_NOMATCH);
3499     case 0x000d:
3500     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3501     break;
3502     case 0x000a:
3503 ph10 231 break;
3504    
3505 nigel 93 case 0x000b:
3506     case 0x000c:
3507     case 0x0085:
3508 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3509 nigel 93 break;
3510     }
3511     }
3512     break;
3513    
3514 ph10 178 case OP_NOT_HSPACE:
3515     for (i = 1; i <= min; i++)
3516     {
3517 ph10 426 if (eptr >= md->end_subject)
3518     {
3519     SCHECK_PARTIAL();
3520     RRETURN(MATCH_NOMATCH);
3521     }
3522 ph10 178 switch(*eptr++)
3523     {
3524     default: break;
3525     case 0x09: /* HT */
3526     case 0x20: /* SPACE */
3527     case 0xa0: /* NBSP */
3528     RRETURN(MATCH_NOMATCH);
3529     }
3530     }
3531     break;
3532    
3533     case OP_HSPACE:
3534     for (i = 1; i <= min; i++)
3535     {
3536 ph10 426 if (eptr >= md->end_subject)
3537     {
3538     SCHECK_PARTIAL();
3539     RRETURN(MATCH_NOMATCH);
3540     }
3541 ph10 178 switch(*eptr++)
3542     {
3543     default: RRETURN(MATCH_NOMATCH);
3544     case 0x09: /* HT */
3545     case 0x20: /* SPACE */
3546     case 0xa0: /* NBSP */
3547 ph10 182 break;
3548 ph10 178 }
3549     }
3550     break;
3551    
3552     case OP_NOT_VSPACE:
3553     for (i = 1; i <= min; i++)
3554     {
3555 ph10 426 if (eptr >= md->end_subject)
3556     {
3557     SCHECK_PARTIAL();
3558     RRETURN(MATCH_NOMATCH);
3559     }
3560 ph10 178 switch(*eptr++)
3561     {
3562     default: break;
3563     case 0x0a: /* LF */
3564     case 0x0b: /* VT */
3565     case 0x0c: /* FF */
3566     case 0x0d: /* CR */
3567     case 0x85: /* NEL */
3568     RRETURN(MATCH_NOMATCH);
3569     }
3570     }
3571     break;
3572    
3573     case OP_VSPACE:
3574     for (i = 1; i <= min; i++)
3575     {
3576 ph10 426 if (eptr >= md->end_subject)
3577     {
3578     SCHECK_PARTIAL();
3579     RRETURN(MATCH_NOMATCH);
3580     }
3581 ph10 178 switch(*eptr++)
3582     {
3583     default: RRETURN(MATCH_NOMATCH);
3584     case 0x0a: /* LF */
3585     case 0x0b: /* VT */
3586     case 0x0c: /* FF */
3587     case 0x0d: /* CR */
3588     case 0x85: /* NEL */
3589 ph10 182 break;
3590 ph10 178 }
3591     }
3592     break;
3593    
3594 nigel 77 case OP_NOT_DIGIT:
3595     for (i = 1; i <= min; i++)
3596 ph10 426 {
3597     if (eptr >= md->end_subject)
3598     {
3599     SCHECK_PARTIAL();
3600     RRETURN(MATCH_NOMATCH);
3601     }
3602 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3603 ph10 426 }
3604 nigel 77 break;
3605    
3606     case OP_DIGIT:
3607     for (i = 1; i <= min; i++)
3608 ph10 426 {
3609     if (eptr >= md->end_subject)
3610     {
3611     SCHECK_PARTIAL();
3612     RRETURN(MATCH_NOMATCH);
3613     }
3614 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3615 ph10 426 }
3616 nigel 77 break;
3617    
3618     case OP_NOT_WHITESPACE:
3619     for (i = 1; i <= min; i++)
3620 ph10 426 {
3621     if (eptr >= md->end_subject)
3622     {
3623     SCHECK_PARTIAL();
3624     RRETURN(MATCH_NOMATCH);
3625     }
3626 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3627 ph10 426 }
3628 nigel 77 break;
3629    
3630     case OP_WHITESPACE:
3631     for (i = 1; i <= min; i++)
3632 ph10 426 {
3633     if (eptr >= md->end_subject)
3634     {
3635     SCHECK_PARTIAL();
3636     RRETURN(MATCH_NOMATCH);
3637     }
3638 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3639 ph10 426 }
3640 nigel 77 break;
3641    
3642     case OP_NOT_WORDCHAR:
3643     for (i = 1; i <= min; i++)
3644 ph10 426 {
3645     if (eptr >= md->end_subject)
3646     {
3647     SCHECK_PARTIAL();
3648     RRETURN(MATCH_NOMATCH);
3649     }
3650 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3651     RRETURN(MATCH_NOMATCH);
3652 ph10 426 }
3653 nigel 77 break;
3654    
3655     case OP_WORDCHAR:
3656     for (i = 1; i <= min; i++)
3657 ph10 426 {
3658     if (eptr >= md->end_subject)
3659     {
3660     SCHECK_PARTIAL();
3661     RRETURN(MATCH_NOMATCH);
3662     }
3663 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3664     RRETURN(MATCH_NOMATCH);
3665 ph10 426 }
3666 nigel 77 break;
3667    
3668     default:
3669     RRETURN(PCRE_ERROR_INTERNAL);
3670     }
3671     }
3672    
3673     /* If min = max, continue at the same level without recursing */
3674    
3675     if (min == max) continue;
3676    
3677     /* If minimizing, we have to test the rest of the pattern before each
3678     subsequent match. Again, separate the UTF-8 case for speed, and also
3679     separate the UCP cases. */
3680    
3681     if (minimize)
3682     {
3683     #ifdef SUPPORT_UCP
3684 nigel 87 if (prop_type >= 0)
3685 nigel 77 {
3686 nigel 87 switch(prop_type)
3687 nigel 77 {
3688 nigel 87 case PT_ANY:
3689     for (fi = min;; fi++)
3690     {
3691 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3692 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3693 ph10 426 if (fi >= max)
3694     {
3695     CHECK_PARTIAL();
3696     RRETURN(MATCH_NOMATCH);
3697     }
3698     if (eptr >= md->end_subject)
3699     {
3700     SCHECK_PARTIAL();
3701     RRETURN(MATCH_NOMATCH);
3702     }
3703 nigel 87 GETCHARINC(c, eptr);
3704     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3705     }
3706 nigel 93 /* Control never gets here */
3707 nigel 87
3708     case PT_LAMP:
3709     for (fi = min;; fi++)
3710     {
3711 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3712 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3713 ph10 426 if (fi >= max)
3714     {
3715     CHECK_PARTIAL();
3716     RRETURN(MATCH_NOMATCH);
3717     }
3718     if (eptr >= md->end_subject)
3719     {
3720     SCHECK_PARTIAL();
3721     RRETURN(MATCH_NOMATCH);
3722     }
3723 nigel 87 GETCHARINC(c, eptr);
3724 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3725 nigel 87 if ((prop_chartype == ucp_Lu ||
3726     prop_chartype == ucp_Ll ||
3727     prop_chartype == ucp_Lt) == prop_fail_result)
3728     RRETURN(MATCH_NOMATCH);
3729     }
3730 nigel 93 /* Control never gets here */
3731 nigel 87
3732     case PT_GC:
3733     for (fi = min;; fi++)
3734     {
3735 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3736 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3737 ph10 426 if (fi >= max)
3738     {
3739     CHECK_PARTIAL();
3740     RRETURN(MATCH_NOMATCH);
3741     }
3742     if (eptr >= md->end_subject)
3743     {
3744     SCHECK_PARTIAL();
3745     RRETURN(MATCH_NOMATCH);
3746     }
3747 nigel 87 GETCHARINC(c, eptr);
3748 ph10 349 prop_category = UCD_CATEGORY(c);
3749 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3750     RRETURN(MATCH_NOMATCH);
3751     }
3752 nigel 93 /* Control never gets here */
3753 nigel 87
3754     case PT_PC:
3755     for (fi = min;; fi++)
3756     {
3757 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3758 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759 ph10 426 if (fi >= max)
3760     {
3761     CHECK_PARTIAL();
3762     RRETURN(MATCH_NOMATCH);
3763     }
3764     if (eptr >= md->end_subject)
3765     {
3766     SCHECK_PARTIAL();
3767     RRETURN(MATCH_NOMATCH);
3768     }
3769 nigel 87 GETCHARINC(c, eptr);
3770 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3771 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3772     RRETURN(MATCH_NOMATCH);
3773     }
3774 nigel 93 /* Control never gets here */
3775 nigel 87
3776     case PT_SC:
3777     for (fi = min;; fi++)
3778     {
3779 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3780 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3781 ph10 426 if (fi >= max)
3782     {
3783     CHECK_PARTIAL();
3784     RRETURN(MATCH_NOMATCH);
3785     }
3786     if (eptr >= md->end_subject)
3787     {
3788     SCHECK_PARTIAL();
3789     RRETURN(MATCH_NOMATCH);
3790     }
3791 nigel 87 GETCHARINC(c, eptr);
3792 ph10 349 prop_script = UCD_SCRIPT(c);
3793 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3794     RRETURN(MATCH_NOMATCH);
3795     }
3796 nigel 93 /* Control never gets here */
3797 nigel 87
3798     default:
3799     RRETURN(PCRE_ERROR_INTERNAL);
3800 nigel 77 }
3801     }
3802    
3803     /* Match extended Unicode sequences. We will get here only if the
3804     support is in the binary; otherwise a compile-time error occurs. */
3805    
3806     else if (ctype == OP_EXTUNI)
3807     {
3808     for (fi = min;; fi++)
3809     {
3810 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3811 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3812 ph10 426 if (fi >= max)
3813     {
3814     CHECK_PARTIAL();
3815     RRETURN(MATCH_NOMATCH);
3816     }
3817     if (eptr >= md->end_subject)
3818     {
3819     SCHECK_PARTIAL();
3820     RRETURN(MATCH_NOMATCH);
3821     }
3822 nigel 77 GETCHARINCTEST(c, eptr);
3823 ph10 349 prop_category = UCD_CATEGORY(c);
3824 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3825     while (eptr < md->end_subject)
3826     {
3827     int len = 1;
3828 ph10 426 if (!utf8) c = *eptr;
3829     else { GETCHARLEN(c, eptr, len); }
3830 ph10 349 prop_category = UCD_CATEGORY(c);
3831 nigel 77 if (prop_category != ucp_M) break;
3832     eptr += len;
3833     }
3834     }
3835     }
3836    
3837     else
3838     #endif /* SUPPORT_UCP */
3839    
3840     #ifdef SUPPORT_UTF8
3841     /* UTF-8 mode */
3842     if (utf8)
3843     {
3844     for (fi = min;; fi++)
3845     {
3846 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3847 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3848 ph10 426 if (fi >= max)
3849     {
3850     CHECK_PARTIAL();
3851 nigel 91 RRETURN(MATCH_NOMATCH);
3852 ph10 426 }
3853     if (eptr >= md->end_subject)
3854     {
3855     SCHECK_PARTIAL();
3856     RRETURN(MATCH_NOMATCH);
3857     }
3858     if (ctype == OP_ANY && IS_NEWLINE(eptr))
3859     RRETURN(MATCH_NOMATCH);
3860 nigel 77 GETCHARINC(c, eptr);
3861     switch(ctype)
3862     {
3863 ph10 342 case OP_ANY: /* This is the non-NL case */
3864 ph10 345 case OP_ALLANY:
3865 nigel 77 case OP_ANYBYTE:
3866     break;
3867    
3868 nigel 93 case OP_ANYNL:
3869     switch(c)
3870     {
3871     default: RRETURN(MATCH_NOMATCH);
3872     case 0x000d:
3873     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3874     break;
3875     case 0x000a:
3876 ph10 231 break;
3877    
3878 nigel 93 case 0x000b:
3879     case 0x000c:
3880     case 0x0085:
3881     case 0x2028:
3882     case 0x2029:
3883 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3884 nigel 93 break;
3885     }
3886     break;
3887    
3888 ph10 178 case OP_NOT_HSPACE:
3889     switch(c)
3890     {
3891     default: break;
3892     case 0x09: /* HT */
3893     case 0x20: /* SPACE */
3894     case 0xa0: /* NBSP */
3895     case 0x1680: /* OGHAM SPACE MARK */
3896     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3897     case 0x2000: /* EN QUAD */
3898     case 0x2001: /* EM QUAD */
3899     case 0x2002: /* EN SPACE */
3900     case 0x2003: /* EM SPACE */
3901     case 0x2004: /* THREE-PER-EM SPACE */
3902     case 0x2005: /* FOUR-PER-EM SPACE */
3903     case 0x2006: /* SIX-PER-EM SPACE */
3904     case 0x2007: /* FIGURE SPACE */
3905     case 0x2008: /* PUNCTUATION SPACE */
3906     case 0x2009: /* THIN SPACE */
3907     case 0x200A: /* HAIR SPACE */
3908     case 0x202f: /* NARROW NO-BREAK SPACE */
3909     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3910     case 0x3000: /* IDEOGRAPHIC SPACE */
3911     RRETURN(MATCH_NOMATCH);
3912     }
3913     break;
3914    
3915     case OP_HSPACE:
3916     switch(c)
3917     {
3918     default: RRETURN(MATCH_NOMATCH);
3919     case 0x09: /* HT */
3920     case 0x20: /* SPACE */
3921     case 0xa0: /* NBSP */
3922     case 0x1680: /* OGHAM SPACE MARK */
3923     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3924     case 0x2000: /* EN QUAD */
3925     case 0x2001: /* EM QUAD */
3926     case 0x2002: /* EN SPACE */
3927     case 0x2003: /* EM SPACE */
3928     case 0x2004: /* THREE-PER-EM SPACE */
3929     case 0x2005: /* FOUR-PER-EM SPACE */
3930     case 0x2006: /* SIX-PER-EM SPACE */
3931     case 0x2007: /* FIGURE SPACE */
3932     case 0x2008: /* PUNCTUATION SPACE */
3933     case 0x2009: /* THIN SPACE */
3934     case 0x200A: /* HAIR SPACE */
3935     case 0x202f: /* NARROW NO-BREAK SPACE */
3936     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3937     case 0x3000: /* IDEOGRAPHIC SPACE */
3938     break;
3939     }
3940     break;
3941    
3942     case OP_NOT_VSPACE:
3943     switch(c)
3944     {
3945     default: break;
3946     case 0x0a: /* LF */
3947     case 0x0b: /* VT */
3948     case 0x0c: /* FF */
3949     case 0x0d: /* CR */
3950     case 0x85: /* NEL */
3951     case 0x2028: /* LINE SEPARATOR */
3952     case 0x2029: /* PARAGRAPH SEPARATOR */
3953     RRETURN(MATCH_NOMATCH);
3954     }
3955     break;
3956    
3957     case OP_VSPACE:
3958     switch(c)
3959     {
3960     default: RRETURN(MATCH_NOMATCH);
3961     case 0x0a: /* LF */
3962     case 0x0b: /* VT */
3963     case 0x0c: /* FF */
3964     case 0x0d: /* CR */
3965     case 0x85: /* NEL */
3966     case 0x2028: /* LINE SEPARATOR */
3967     case 0x2029: /* PARAGRAPH SEPARATOR */
3968     break;
3969     }
3970     break;
3971    
3972 nigel 77 case OP_NOT_DIGIT:
3973     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3974     RRETURN(MATCH_NOMATCH);
3975     break;
3976    
3977     case OP_DIGIT:
3978     if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3979     RRETURN(MATCH_NOMATCH);
3980     break;
3981    
3982     case OP_NOT_WHITESPACE:
3983     if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3984     RRETURN(MATCH_NOMATCH);
3985     break;
3986    
3987     case OP_WHITESPACE:
3988     if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3989     RRETURN(MATCH_NOMATCH);
3990     break;
3991    
3992     case OP_NOT_WORDCHAR:
3993     if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3994     RRETURN(MATCH_NOMATCH);
3995     break;
3996    
3997     case OP_WORDCHAR:
3998     if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3999     RRETURN(MATCH_NOMATCH);
4000     break;
4001    
4002     default:
4003     RRETURN(PCRE_ERROR_INTERNAL);
4004     }
4005     }
4006     }
4007     else
4008     #endif
4009     /* Not UTF-8 mode */
4010     {
4011     for (fi = min;; fi++)
4012     {
4013 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4014 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4015 ph10 426 if (fi >= max)
4016     {
4017     CHECK_PARTIAL();
4018 nigel 91 RRETURN(MATCH_NOMATCH);
4019 ph10 426 }
4020     if (eptr >= md->end_subject)
4021     {
4022     SCHECK_PARTIAL();
4023     RRETURN(MATCH_NOMATCH);
4024     }
4025     if (ctype == OP_ANY && IS_NEWLINE(eptr))
4026     RRETURN(MATCH_NOMATCH);
4027 nigel 77 c = *eptr++;
4028     switch(ctype)
4029     {
4030 ph10 342 case OP_ANY: /* This is the non-NL case */
4031 ph10 345 case OP_ALLANY:
4032 nigel 77 case OP_ANYBYTE:
4033     break;
4034    
4035 nigel 93 case OP_ANYNL:
4036     switch(c)
4037     {
4038     default: RRETURN(MATCH_NOMATCH);
4039     case 0x000d:
4040     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4041     break;
4042 ph10 231
4043 nigel 93 case 0x000a:
4044 ph10 231 break;
4045    
4046 nigel 93 case 0x000b:
4047     case 0x000c:
4048     case 0x0085:
4049 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4050 nigel 93 break;
4051     }
4052     break;
4053    
4054 ph10 178 case OP_NOT_HSPACE:
4055     switch(c)
4056     {
4057     default: break;
4058     case 0x09: /* HT */
4059     case 0x20: /* SPACE */
4060     case 0xa0: /* NBSP */
4061     RRETURN(MATCH_NOMATCH);
4062     }
4063     break;
4064    
4065     case OP_HSPACE:
4066     switch(c)
4067     {
4068     default: RRETURN(MATCH_NOMATCH);
4069     case 0x09: /* HT */
4070     case 0x20: /* SPACE */
4071     case 0xa0: /* NBSP */
4072     break;
4073     }
4074     break;
4075    
4076     case OP_NOT_VSPACE:
4077     switch(c)
4078     {
4079     default: break;
4080     case 0x0a: /* LF */
4081     case 0x0b: /* VT */
4082     case 0x0c: /* FF */
4083     case 0x0d: /* CR */
4084     case 0x85: /* NEL */
4085     RRETURN(MATCH_NOMATCH);
4086     }
4087     break;
4088    
4089   &