/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 197 - (hide annotations) (download)
Tue Jul 31 10:50:18 2007 UTC (5 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 146712 byte(s)
Abolish the NULLWSLIMIT error at the expense of using more stack when an 
unlimited repeat could match an empty string. Also, doc tidies for a test 
release.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 nigel 93 #define NLBLOCK md /* Block containing newline information */
46     #define PSSTART start_subject /* Field containing processed string start */
47     #define PSEND end_subject /* Field containing processed string end */
48    
49 nigel 77 #include "pcre_internal.h"
50    
51 ph10 137 /* Undefine some potentially clashing cpp symbols */
52    
53     #undef min
54     #undef max
55    
56 nigel 77 /* Flag bits for the match() function */
57    
58 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
59     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
60 nigel 77
61     /* Non-error returns from the match() function. Error returns are externally
62     defined PCRE_ERROR_xxx codes, which are all negative. */
63    
64     #define MATCH_MATCH 1
65     #define MATCH_NOMATCH 0
66    
67     /* Maximum number of ints of offset to save on the stack for recursive calls.
68     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
69     because the offset vector is always a multiple of 3 long. */
70    
71     #define REC_STACK_SAVE_MAX 30
72    
73     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
74    
75     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
76     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
77    
78    
79    
80     #ifdef DEBUG
81     /*************************************************
82     * Debugging function to print chars *
83     *************************************************/
84    
85     /* Print a sequence of chars in printable format, stopping at the end of the
86     subject if the requested.
87    
88     Arguments:
89     p points to characters
90     length number to print
91     is_subject TRUE if printing from within md->start_subject
92     md pointer to matching data block, if is_subject is TRUE
93    
94     Returns: nothing
95     */
96    
97     static void
98     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
99     {
100 nigel 93 unsigned int c;
101 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
102     while (length-- > 0)
103     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
104     }
105     #endif
106    
107    
108    
109     /*************************************************
110     * Match a back-reference *
111     *************************************************/
112    
113     /* If a back reference hasn't been set, the length that is passed is greater
114     than the number of characters left in the string, so the match fails.
115    
116     Arguments:
117     offset index into the offset vector
118     eptr points into the subject
119     length length to be matched
120     md points to match data block
121     ims the ims flags
122    
123     Returns: TRUE if matched
124     */
125    
126     static BOOL
127 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
128 nigel 77 unsigned long int ims)
129     {
130 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
131 nigel 77
132     #ifdef DEBUG
133     if (eptr >= md->end_subject)
134     printf("matching subject <null>");
135     else
136     {
137     printf("matching subject ");
138     pchars(eptr, length, TRUE, md);
139     }
140     printf(" against backref ");
141     pchars(p, length, FALSE, md);
142     printf("\n");
143     #endif
144    
145     /* Always fail if not enough characters left */
146    
147     if (length > md->end_subject - eptr) return FALSE;
148    
149     /* Separate the caselesss case for speed */
150    
151     if ((ims & PCRE_CASELESS) != 0)
152     {
153     while (length-- > 0)
154     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
155     }
156     else
157     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
158    
159     return TRUE;
160     }
161    
162    
163    
164     /***************************************************************************
165     ****************************************************************************
166     RECURSION IN THE match() FUNCTION
167    
168 nigel 87 The match() function is highly recursive, though not every recursive call
169     increases the recursive depth. Nevertheless, some regular expressions can cause
170     it to recurse to a great depth. I was writing for Unix, so I just let it call
171     itself recursively. This uses the stack for saving everything that has to be
172     saved for a recursive call. On Unix, the stack can be large, and this works
173     fine.
174 nigel 77
175 nigel 87 It turns out that on some non-Unix-like systems there are problems with
176     programs that use a lot of stack. (This despite the fact that every last chip
177     has oodles of memory these days, and techniques for extending the stack have
178     been known for decades.) So....
179 nigel 77
180     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
181     calls by keeping local variables that need to be preserved in blocks of memory
182 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
183 nigel 77 achieve this so that the actual code doesn't look very different to what it
184     always used to.
185 ph10 164
186 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
187 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
188     Switzer, the use of longjmp() has been abolished, at the cost of having to
189     provide a unique number for each call to RMATCH. There is no way of generating
190     a sequence of numbers at compile time in C. I have given them names, to make
191     them stand out more clearly.
192    
193     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
194     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
195 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
196     don't have indeterminate values; this has meant that the frame size can be
197 ph10 164 reduced because the result can be "passed back" by straight setting of the
198     variable instead of being passed in the frame.
199 nigel 77 ****************************************************************************
200     ***************************************************************************/
201    
202    
203 ph10 164 /* Numbers for RMATCH calls */
204    
205     enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
206     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
207     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
208     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
209 ph10 197 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
210 ph10 164
211 ph10 165
212 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
213 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
214 ph10 164 actuall used in this definition. */
215 nigel 77
216     #ifndef NO_RECURSE
217     #define REGISTER register
218 ph10 164
219 nigel 87 #ifdef DEBUG
220 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
221 nigel 87 { \
222     printf("match() called in line %d\n", __LINE__); \
223 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
224 nigel 87 printf("to line %d\n", __LINE__); \
225     }
226     #define RRETURN(ra) \
227     { \
228     printf("match() returned %d from line %d ", ra, __LINE__); \
229     return ra; \
230     }
231     #else
232 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
234 nigel 77 #define RRETURN(ra) return ra
235 nigel 87 #endif
236    
237 nigel 77 #else
238    
239    
240 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
241     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
242     argument of match(), which never changes. */
243 nigel 77
244     #define REGISTER
245    
246 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
247 nigel 77 {\
248     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
249 ph10 164 frame->Xwhere = rw; \
250     newframe->Xeptr = ra;\
251     newframe->Xecode = rb;\
252 ph10 168 newframe->Xmstart = mstart;\
253 ph10 164 newframe->Xoffset_top = rc;\
254     newframe->Xims = re;\
255     newframe->Xeptrb = rf;\
256     newframe->Xflags = rg;\
257     newframe->Xrdepth = frame->Xrdepth + 1;\
258     newframe->Xprevframe = frame;\
259     frame = newframe;\
260     DPRINTF(("restarting from line %d\n", __LINE__));\
261     goto HEAP_RECURSE;\
262     L_##rw:\
263     DPRINTF(("jumped back to line %d\n", __LINE__));\
264 nigel 77 }
265    
266     #define RRETURN(ra)\
267     {\
268     heapframe *newframe = frame;\
269     frame = newframe->Xprevframe;\
270     (pcre_stack_free)(newframe);\
271     if (frame != NULL)\
272     {\
273 ph10 164 rrc = ra;\
274     goto HEAP_RETURN;\
275 nigel 77 }\
276     return ra;\
277     }
278    
279    
280     /* Structure for remembering the local variables in a private frame */
281    
282     typedef struct heapframe {
283     struct heapframe *Xprevframe;
284    
285     /* Function arguments that may change */
286    
287     const uschar *Xeptr;
288     const uschar *Xecode;
289 ph10 172 const uschar *Xmstart;
290 nigel 77 int Xoffset_top;
291     long int Xims;
292     eptrblock *Xeptrb;
293     int Xflags;
294 nigel 91 unsigned int Xrdepth;
295 nigel 77
296     /* Function local variables */
297    
298     const uschar *Xcallpat;
299     const uschar *Xcharptr;
300     const uschar *Xdata;
301     const uschar *Xnext;
302     const uschar *Xpp;
303     const uschar *Xprev;
304     const uschar *Xsaved_eptr;
305    
306     recursion_info Xnew_recursive;
307    
308     BOOL Xcur_is_word;
309     BOOL Xcondition;
310     BOOL Xprev_is_word;
311    
312     unsigned long int Xoriginal_ims;
313    
314     #ifdef SUPPORT_UCP
315     int Xprop_type;
316 nigel 87 int Xprop_value;
317 nigel 77 int Xprop_fail_result;
318     int Xprop_category;
319     int Xprop_chartype;
320 nigel 87 int Xprop_script;
321 ph10 123 int Xoclength;
322     uschar Xocchars[8];
323 nigel 77 #endif
324    
325     int Xctype;
326 nigel 93 unsigned int Xfc;
327 nigel 77 int Xfi;
328     int Xlength;
329     int Xmax;
330     int Xmin;
331     int Xnumber;
332     int Xoffset;
333     int Xop;
334     int Xsave_capture_last;
335     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
336     int Xstacksave[REC_STACK_SAVE_MAX];
337    
338     eptrblock Xnewptrb;
339    
340 ph10 164 /* Where to jump back to */
341 nigel 77
342 ph10 164 int Xwhere;
343 ph10 165
344 nigel 77 } heapframe;
345    
346     #endif
347    
348    
349     /***************************************************************************
350     ***************************************************************************/
351    
352    
353    
354     /*************************************************
355     * Match from current position *
356     *************************************************/
357    
358 nigel 93 /* This function is called recursively in many circumstances. Whenever it
359 nigel 77 returns a negative (error) response, the outer incarnation must also return the
360     same response.
361    
362     Performance note: It might be tempting to extract commonly used fields from the
363     md structure (e.g. utf8, end_subject) into individual variables to improve
364     performance. Tests using gcc on a SPARC disproved this; in the first case, it
365     made performance worse.
366    
367     Arguments:
368 nigel 93 eptr pointer to current character in subject
369     ecode pointer to current position in compiled code
370 ph10 168 mstart pointer to the current match start position (can be modified
371 ph10 172 by encountering \K)
372 nigel 77 offset_top current top pointer
373     md pointer to "static" info for the match
374     ims current /i, /m, and /s options
375     eptrb pointer to chain of blocks containing eptr at start of
376     brackets - for testing for empty matches
377     flags can contain
378     match_condassert - this is an assertion condition
379 nigel 93 match_cbegroup - this is the start of an unlimited repeat
380     group that can match an empty string
381 nigel 87 rdepth the recursion depth
382 nigel 77
383     Returns: MATCH_MATCH if matched ) these values are >= 0
384     MATCH_NOMATCH if failed to match )
385     a negative PCRE_ERROR_xxx value if aborted by an error condition
386 nigel 87 (e.g. stopped by repeated call or recursion limit)
387 nigel 77 */
388    
389     static int
390 ph10 172 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
391 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
392 nigel 91 int flags, unsigned int rdepth)
393 nigel 77 {
394     /* These variables do not need to be preserved over recursion in this function,
395 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
396     "register" because they are used a lot in loops. */
397 nigel 77
398 nigel 91 register int rrc; /* Returns from recursive calls */
399     register int i; /* Used for loops not involving calls to RMATCH() */
400 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
401 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
402 nigel 77
403 nigel 93 BOOL minimize, possessive; /* Quantifier options */
404    
405 nigel 77 /* When recursion is not being used, all "local" variables that have to be
406     preserved over calls to RMATCH() are part of a "frame" which is obtained from
407     heap storage. Set up the top-level frame here; others are obtained from the
408     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
409    
410     #ifdef NO_RECURSE
411     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
412     frame->Xprevframe = NULL; /* Marks the top level */
413    
414     /* Copy in the original argument variables */
415    
416     frame->Xeptr = eptr;
417     frame->Xecode = ecode;
418 ph10 168 frame->Xmstart = mstart;
419 nigel 77 frame->Xoffset_top = offset_top;
420     frame->Xims = ims;
421     frame->Xeptrb = eptrb;
422     frame->Xflags = flags;
423 nigel 87 frame->Xrdepth = rdepth;
424 nigel 77
425     /* This is where control jumps back to to effect "recursion" */
426    
427     HEAP_RECURSE:
428    
429     /* Macros make the argument variables come from the current frame */
430    
431     #define eptr frame->Xeptr
432     #define ecode frame->Xecode
433 ph10 168 #define mstart frame->Xmstart
434 nigel 77 #define offset_top frame->Xoffset_top
435     #define ims frame->Xims
436     #define eptrb frame->Xeptrb
437     #define flags frame->Xflags
438 nigel 87 #define rdepth frame->Xrdepth
439 nigel 77
440     /* Ditto for the local variables */
441    
442     #ifdef SUPPORT_UTF8
443     #define charptr frame->Xcharptr
444     #endif
445     #define callpat frame->Xcallpat
446     #define data frame->Xdata
447     #define next frame->Xnext
448     #define pp frame->Xpp
449     #define prev frame->Xprev
450     #define saved_eptr frame->Xsaved_eptr
451    
452     #define new_recursive frame->Xnew_recursive
453    
454     #define cur_is_word frame->Xcur_is_word
455     #define condition frame->Xcondition
456     #define prev_is_word frame->Xprev_is_word
457    
458     #define original_ims frame->Xoriginal_ims
459    
460     #ifdef SUPPORT_UCP
461     #define prop_type frame->Xprop_type
462 nigel 87 #define prop_value frame->Xprop_value
463 nigel 77 #define prop_fail_result frame->Xprop_fail_result
464     #define prop_category frame->Xprop_category
465     #define prop_chartype frame->Xprop_chartype
466 nigel 87 #define prop_script frame->Xprop_script
467 ph10 115 #define oclength frame->Xoclength
468     #define occhars frame->Xocchars
469 nigel 77 #endif
470    
471     #define ctype frame->Xctype
472     #define fc frame->Xfc
473     #define fi frame->Xfi
474     #define length frame->Xlength
475     #define max frame->Xmax
476     #define min frame->Xmin
477     #define number frame->Xnumber
478     #define offset frame->Xoffset
479     #define op frame->Xop
480     #define save_capture_last frame->Xsave_capture_last
481     #define save_offset1 frame->Xsave_offset1
482     #define save_offset2 frame->Xsave_offset2
483     #define save_offset3 frame->Xsave_offset3
484     #define stacksave frame->Xstacksave
485    
486     #define newptrb frame->Xnewptrb
487    
488     /* When recursion is being used, local variables are allocated on the stack and
489     get preserved during recursion in the normal way. In this environment, fi and
490     i, and fc and c, can be the same variables. */
491    
492 nigel 93 #else /* NO_RECURSE not defined */
493 nigel 77 #define fi i
494     #define fc c
495    
496    
497 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
498     const uschar *charptr; /* in small blocks of the code. My normal */
499     #endif /* style of coding would have declared */
500     const uschar *callpat; /* them within each of those blocks. */
501     const uschar *data; /* However, in order to accommodate the */
502     const uschar *next; /* version of this code that uses an */
503     USPTR pp; /* external "stack" implemented on the */
504     const uschar *prev; /* heap, it is easier to declare them all */
505     USPTR saved_eptr; /* here, so the declarations can be cut */
506     /* out in a block. The only declarations */
507     recursion_info new_recursive; /* within blocks below are for variables */
508     /* that do not have to be preserved over */
509     BOOL cur_is_word; /* a recursive call to RMATCH(). */
510     BOOL condition;
511 nigel 77 BOOL prev_is_word;
512    
513     unsigned long int original_ims;
514    
515     #ifdef SUPPORT_UCP
516     int prop_type;
517 nigel 87 int prop_value;
518 nigel 77 int prop_fail_result;
519     int prop_category;
520     int prop_chartype;
521 nigel 87 int prop_script;
522 ph10 115 int oclength;
523     uschar occhars[8];
524 nigel 77 #endif
525    
526     int ctype;
527     int length;
528     int max;
529     int min;
530     int number;
531     int offset;
532     int op;
533     int save_capture_last;
534     int save_offset1, save_offset2, save_offset3;
535     int stacksave[REC_STACK_SAVE_MAX];
536    
537     eptrblock newptrb;
538 nigel 93 #endif /* NO_RECURSE */
539 nigel 77
540     /* These statements are here to stop the compiler complaining about unitialized
541     variables. */
542    
543     #ifdef SUPPORT_UCP
544 nigel 87 prop_value = 0;
545 nigel 77 prop_fail_result = 0;
546     #endif
547    
548 nigel 93
549 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
550     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
551     used. Thanks to Ian Taylor for noticing this possibility and sending the
552     original patch. */
553    
554     TAIL_RECURSE:
555    
556 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
557     are specified by the macro RMATCH and RRETURN is used to return. When
558     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
559     and a "return", respectively (possibly with some debugging if DEBUG is
560     defined). However, RMATCH isn't like a function call because it's quite a
561     complicated macro. It has to be used in one particular way. This shouldn't,
562     however, impact performance when true recursion is being used. */
563 nigel 77
564 ph10 164 #ifdef SUPPORT_UTF8
565     utf8 = md->utf8; /* Local copy of the flag */
566     #else
567     utf8 = FALSE;
568     #endif
569    
570 nigel 87 /* First check that we haven't called match() too many times, or that we
571     haven't exceeded the recursive call limit. */
572    
573 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
574 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
575 nigel 77
576     original_ims = ims; /* Save for resetting on ')' */
577 nigel 91
578 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
579     string, the match_cbegroup flag is set. When this is the case, add the current
580     subject pointer to the chain of such remembered pointers, to be checked when we
581     hit the closing ket, in order to break infinite loops that match no characters.
582 ph10 197 When match() is called in other circumstances, don't add to the chain. The
583     match_cbegroup flag must NOT be used with tail recursion, because the memory
584     block that is used is on the stack, so a new one may be required for each
585     match(). */
586 nigel 77
587 nigel 93 if ((flags & match_cbegroup) != 0)
588 nigel 77 {
589 ph10 197 newptrb.epb_saved_eptr = eptr;
590     newptrb.epb_prev = eptrb;
591     eptrb = &newptrb;
592 nigel 77 }
593    
594 nigel 93 /* Now start processing the opcodes. */
595 nigel 77
596     for (;;)
597     {
598 nigel 93 minimize = possessive = FALSE;
599 nigel 77 op = *ecode;
600    
601     /* For partial matching, remember if we ever hit the end of the subject after
602     matching at least one subject character. */
603    
604     if (md->partial &&
605     eptr >= md->end_subject &&
606 ph10 168 eptr > mstart)
607 nigel 77 md->hitend = TRUE;
608    
609 nigel 93 switch(op)
610     {
611     /* Handle a capturing bracket. If there is space in the offset vector, save
612     the current subject position in the working slot at the top of the vector.
613     We mustn't change the current values of the data slot, because they may be
614     set from a previous iteration of this group, and be referred to by a
615     reference inside the group.
616 nigel 77
617 nigel 93 If the bracket fails to match, we need to restore this value and also the
618     values of the final offsets, in case they were set by a previous iteration
619     of the same bracket.
620 nigel 77
621 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
622     a non-capturing bracket. Don't worry about setting the flag for the error
623     case here; that is handled in the code for KET. */
624 nigel 77
625 nigel 93 case OP_CBRA:
626     case OP_SCBRA:
627     number = GET2(ecode, 1+LINK_SIZE);
628 nigel 77 offset = number << 1;
629    
630     #ifdef DEBUG
631 nigel 93 printf("start bracket %d\n", number);
632     printf("subject=");
633 nigel 77 pchars(eptr, 16, TRUE, md);
634     printf("\n");
635     #endif
636    
637     if (offset < md->offset_max)
638     {
639     save_offset1 = md->offset_vector[offset];
640     save_offset2 = md->offset_vector[offset+1];
641     save_offset3 = md->offset_vector[md->offset_end - number];
642     save_capture_last = md->capture_last;
643    
644     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
645     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
646    
647 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
648 nigel 77 do
649     {
650 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
651     ims, eptrb, flags, RM1);
652 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
653     md->capture_last = save_capture_last;
654     ecode += GET(ecode, 1);
655     }
656     while (*ecode == OP_ALT);
657    
658     DPRINTF(("bracket %d failed\n", number));
659    
660     md->offset_vector[offset] = save_offset1;
661     md->offset_vector[offset+1] = save_offset2;
662     md->offset_vector[md->offset_end - number] = save_offset3;
663    
664     RRETURN(MATCH_NOMATCH);
665     }
666    
667 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
668     as a non-capturing bracket. */
669 nigel 77
670 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
671     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
672    
673 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
674 nigel 77
675 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
677    
678 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
679     final alternative within the brackets, we would return the result of a
680     recursive call to match() whatever happened. We can reduce stack usage by
681 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
682     is set.*/
683 nigel 77
684 nigel 93 case OP_BRA:
685     case OP_SBRA:
686     DPRINTF(("start non-capturing bracket\n"));
687     flags = (op >= OP_SBRA)? match_cbegroup : 0;
688 nigel 91 for (;;)
689 nigel 77 {
690 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
691 nigel 93 {
692 ph10 197 if (flags == 0) /* Not a possibly empty group */
693     {
694     ecode += _pcre_OP_lengths[*ecode];
695     DPRINTF(("bracket 0 tail recursion\n"));
696     goto TAIL_RECURSE;
697     }
698    
699     /* Possibly empty group; can't use tail recursion. */
700    
701     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702     eptrb, flags, RM48);
703     RRETURN(rrc);
704 nigel 93 }
705 nigel 91
706     /* For non-final alternatives, continue the loop for a NOMATCH result;
707     otherwise return. */
708    
709 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
710     eptrb, flags, RM2);
711 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
712     ecode += GET(ecode, 1);
713     }
714 nigel 91 /* Control never reaches here. */
715 nigel 77
716     /* Conditional group: compilation checked that there are no more than
717     two branches. If the condition is false, skipping the first branch takes us
718     past the end if there is only one branch, but that's OK because that is
719 nigel 91 exactly what going to the ket would do. As there is only one branch to be
720     obeyed, we can use tail recursion to avoid using another stack frame. */
721 nigel 77
722     case OP_COND:
723 nigel 93 case OP_SCOND:
724     if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
725 nigel 77 {
726 nigel 93 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
727     condition = md->recursive != NULL &&
728     (offset == RREF_ANY || offset == md->recursive->group_num);
729     ecode += condition? 3 : GET(ecode, 1);
730     }
731    
732     else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
733     {
734 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
735 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
736     ecode += condition? 3 : GET(ecode, 1);
737 nigel 77 }
738    
739 nigel 93 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
740     {
741     condition = FALSE;
742     ecode += GET(ecode, 1);
743     }
744    
745 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
746 nigel 93 the final argument match_condassert causes it to stop at the end of an
747     assertion. */
748 nigel 77
749     else
750     {
751 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
752     match_condassert, RM3);
753 nigel 77 if (rrc == MATCH_MATCH)
754     {
755 nigel 93 condition = TRUE;
756     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
757 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
758     }
759     else if (rrc != MATCH_NOMATCH)
760     {
761     RRETURN(rrc); /* Need braces because of following else */
762     }
763 nigel 93 else
764     {
765     condition = FALSE;
766     ecode += GET(ecode, 1);
767     }
768     }
769 nigel 91
770 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
771 ph10 197 we can use tail recursion to avoid using another stack frame, except when
772     match_cbegroup is required for an unlimited repeat of a possibly empty
773     group. If the second alternative doesn't exist, we can just plough on. */
774 nigel 91
775 nigel 93 if (condition || *ecode == OP_ALT)
776     {
777 nigel 91 ecode += 1 + LINK_SIZE;
778 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
779     {
780     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
781     RRETURN(rrc);
782     }
783     else /* Group must match something */
784     {
785     flags = 0;
786     goto TAIL_RECURSE;
787     }
788 nigel 77 }
789 ph10 197 else /* Condition false & no 2nd alternative */
790 nigel 93 {
791     ecode += 1 + LINK_SIZE;
792     }
793     break;
794 nigel 77
795    
796 nigel 93 /* End of the pattern. If we are in a top-level recursion, we should
797     restore the offsets appropriately and continue from after the call. */
798 nigel 77
799     case OP_END:
800     if (md->recursive != NULL && md->recursive->group_num == 0)
801     {
802     recursion_info *rec = md->recursive;
803 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
804 nigel 77 md->recursive = rec->prevrec;
805     memmove(md->offset_vector, rec->offset_save,
806     rec->saved_max * sizeof(int));
807 ph10 168 mstart = rec->save_start;
808 nigel 77 ims = original_ims;
809     ecode = rec->after_call;
810     break;
811     }
812    
813     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
814     string - backtracking will then try other alternatives, if any. */
815    
816 ph10 168 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
817     md->end_match_ptr = eptr; /* Record where we ended */
818     md->end_offset_top = offset_top; /* and how many extracts were taken */
819     md->start_match_ptr = mstart; /* and the start (\K can modify) */
820 nigel 77 RRETURN(MATCH_MATCH);
821    
822     /* Change option settings */
823    
824     case OP_OPT:
825     ims = ecode[1];
826     ecode += 2;
827     DPRINTF(("ims set to %02lx\n", ims));
828     break;
829    
830     /* Assertion brackets. Check the alternative branches in turn - the
831     matching won't pass the KET for an assertion. If any one branch matches,
832     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
833     start of each branch to move the current point backwards, so the code at
834     this level is identical to the lookahead case. */
835    
836     case OP_ASSERT:
837     case OP_ASSERTBACK:
838     do
839     {
840 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
841     RM4);
842 nigel 77 if (rrc == MATCH_MATCH) break;
843     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
844     ecode += GET(ecode, 1);
845     }
846     while (*ecode == OP_ALT);
847     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
848    
849     /* If checking an assertion for a condition, return MATCH_MATCH. */
850    
851     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
852    
853     /* Continue from after the assertion, updating the offsets high water
854     mark, since extracts may have been taken during the assertion. */
855    
856     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
857     ecode += 1 + LINK_SIZE;
858     offset_top = md->end_offset_top;
859     continue;
860    
861     /* Negative assertion: all branches must fail to match */
862    
863     case OP_ASSERT_NOT:
864     case OP_ASSERTBACK_NOT:
865     do
866     {
867 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
868     RM5);
869 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
870     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
871     ecode += GET(ecode,1);
872     }
873     while (*ecode == OP_ALT);
874    
875     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
876    
877     ecode += 1 + LINK_SIZE;
878     continue;
879    
880     /* Move the subject pointer back. This occurs only at the start of
881     each branch of a lookbehind assertion. If we are too close to the start to
882     move back, this match function fails. When working with UTF-8 we move
883     back a number of characters, not bytes. */
884    
885     case OP_REVERSE:
886     #ifdef SUPPORT_UTF8
887     if (utf8)
888     {
889 nigel 93 i = GET(ecode, 1);
890     while (i-- > 0)
891 nigel 77 {
892     eptr--;
893     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
894     BACKCHAR(eptr)
895     }
896     }
897     else
898     #endif
899    
900     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
901    
902     {
903 nigel 93 eptr -= GET(ecode, 1);
904 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
905     }
906    
907     /* Skip to next op code */
908    
909     ecode += 1 + LINK_SIZE;
910     break;
911    
912     /* The callout item calls an external function, if one is provided, passing
913     details of the match so far. This is mainly for debugging, though the
914     function is able to force a failure. */
915    
916     case OP_CALLOUT:
917     if (pcre_callout != NULL)
918     {
919     pcre_callout_block cb;
920     cb.version = 1; /* Version 1 of the callout block */
921     cb.callout_number = ecode[1];
922     cb.offset_vector = md->offset_vector;
923 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
924 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
925 ph10 168 cb.start_match = mstart - md->start_subject;
926 nigel 77 cb.current_position = eptr - md->start_subject;
927     cb.pattern_position = GET(ecode, 2);
928     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
929     cb.capture_top = offset_top/2;
930     cb.capture_last = md->capture_last;
931     cb.callout_data = md->callout_data;
932     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
933     if (rrc < 0) RRETURN(rrc);
934     }
935     ecode += 2 + 2*LINK_SIZE;
936     break;
937    
938     /* Recursion either matches the current regex, or some subexpression. The
939     offset data is the offset to the starting bracket from the start of the
940     whole pattern. (This is so that it works from duplicated subpatterns.)
941    
942     If there are any capturing brackets started but not finished, we have to
943     save their starting points and reinstate them after the recursion. However,
944     we don't know how many such there are (offset_top records the completed
945     total) so we just have to save all the potential data. There may be up to
946     65535 such values, which is too large to put on the stack, but using malloc
947     for small numbers seems expensive. As a compromise, the stack is used when
948     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
949     is used. A problem is what to do if the malloc fails ... there is no way of
950     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
951     values on the stack, and accept that the rest may be wrong.
952    
953     There are also other values that have to be saved. We use a chained
954     sequence of blocks that actually live on the stack. Thanks to Robin Houston
955     for the original version of this logic. */
956    
957     case OP_RECURSE:
958     {
959     callpat = md->start_code + GET(ecode, 1);
960 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
961     GET2(callpat, 1 + LINK_SIZE);
962 nigel 77
963     /* Add to "recursing stack" */
964    
965     new_recursive.prevrec = md->recursive;
966     md->recursive = &new_recursive;
967    
968     /* Find where to continue from afterwards */
969    
970     ecode += 1 + LINK_SIZE;
971     new_recursive.after_call = ecode;
972    
973     /* Now save the offset data. */
974    
975     new_recursive.saved_max = md->offset_end;
976     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
977     new_recursive.offset_save = stacksave;
978     else
979     {
980     new_recursive.offset_save =
981     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
982     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
983     }
984    
985     memcpy(new_recursive.offset_save, md->offset_vector,
986     new_recursive.saved_max * sizeof(int));
987 ph10 168 new_recursive.save_start = mstart;
988     mstart = eptr;
989 nigel 77
990     /* OK, now we can do the recursion. For each top-level alternative we
991     restore the offset and recursion data. */
992    
993     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
994 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
995 nigel 77 do
996     {
997 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
998     md, ims, eptrb, flags, RM6);
999 nigel 77 if (rrc == MATCH_MATCH)
1000     {
1001 nigel 87 DPRINTF(("Recursion matched\n"));
1002 nigel 77 md->recursive = new_recursive.prevrec;
1003     if (new_recursive.offset_save != stacksave)
1004     (pcre_free)(new_recursive.offset_save);
1005     RRETURN(MATCH_MATCH);
1006     }
1007 nigel 87 else if (rrc != MATCH_NOMATCH)
1008     {
1009     DPRINTF(("Recursion gave error %d\n", rrc));
1010     RRETURN(rrc);
1011     }
1012 nigel 77
1013     md->recursive = &new_recursive;
1014     memcpy(md->offset_vector, new_recursive.offset_save,
1015     new_recursive.saved_max * sizeof(int));
1016     callpat += GET(callpat, 1);
1017     }
1018     while (*callpat == OP_ALT);
1019    
1020     DPRINTF(("Recursion didn't match\n"));
1021     md->recursive = new_recursive.prevrec;
1022     if (new_recursive.offset_save != stacksave)
1023     (pcre_free)(new_recursive.offset_save);
1024     RRETURN(MATCH_NOMATCH);
1025     }
1026     /* Control never reaches here */
1027    
1028     /* "Once" brackets are like assertion brackets except that after a match,
1029     the point in the subject string is not moved back. Thus there can never be
1030     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1031     Check the alternative branches in turn - the matching won't pass the KET
1032     for this kind of subpattern. If any one branch matches, we carry on as at
1033     the end of a normal bracket, leaving the subject pointer. */
1034    
1035     case OP_ONCE:
1036 nigel 91 prev = ecode;
1037     saved_eptr = eptr;
1038    
1039     do
1040 nigel 77 {
1041 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1042 nigel 91 if (rrc == MATCH_MATCH) break;
1043     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1044     ecode += GET(ecode,1);
1045     }
1046     while (*ecode == OP_ALT);
1047 nigel 77
1048 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1049 nigel 77
1050 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1051 nigel 77
1052 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1053     mark, since extracts may have been taken. */
1054 nigel 77
1055 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1056 nigel 77
1057 nigel 91 offset_top = md->end_offset_top;
1058     eptr = md->end_match_ptr;
1059 nigel 77
1060 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1061     happens for a repeating ket if no characters were matched in the group.
1062     This is the forcible breaking of infinite loops as implemented in Perl
1063     5.005. If there is an options reset, it will get obeyed in the normal
1064     course of events. */
1065 nigel 77
1066 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1067     {
1068     ecode += 1+LINK_SIZE;
1069     break;
1070     }
1071 nigel 77
1072 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1073     preceding bracket, in the appropriate order. The second "call" of match()
1074     uses tail recursion, to avoid using another stack frame. We need to reset
1075     any options that changed within the bracket before re-running it, so
1076     check the next opcode. */
1077 nigel 77
1078 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1079     {
1080     ims = (ims & ~PCRE_IMS) | ecode[4];
1081     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1082     }
1083 nigel 77
1084 nigel 91 if (*ecode == OP_KETRMIN)
1085     {
1086 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1087 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1088     ecode = prev;
1089 ph10 197 flags = 0;
1090 nigel 91 goto TAIL_RECURSE;
1091 nigel 77 }
1092 nigel 91 else /* OP_KETRMAX */
1093     {
1094 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1095 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1096     ecode += 1 + LINK_SIZE;
1097 ph10 197 flags = 0;
1098 nigel 91 goto TAIL_RECURSE;
1099     }
1100     /* Control never gets here */
1101 nigel 77
1102     /* An alternation is the end of a branch; scan along to find the end of the
1103     bracketed group and go to there. */
1104    
1105     case OP_ALT:
1106     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1107     break;
1108    
1109     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1110     that it may occur zero times. It may repeat infinitely, or not at all -
1111     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1112     repeat limits are compiled as a number of copies, with the optional ones
1113     preceded by BRAZERO or BRAMINZERO. */
1114    
1115     case OP_BRAZERO:
1116     {
1117     next = ecode+1;
1118 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1119 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120     do next += GET(next,1); while (*next == OP_ALT);
1121 nigel 93 ecode = next + 1 + LINK_SIZE;
1122 nigel 77 }
1123     break;
1124    
1125     case OP_BRAMINZERO:
1126     {
1127     next = ecode+1;
1128 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1129 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1130 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131     ecode++;
1132     }
1133     break;
1134    
1135 nigel 93 /* End of a group, repeated or non-repeating. */
1136 nigel 77
1137     case OP_KET:
1138     case OP_KETRMIN:
1139     case OP_KETRMAX:
1140 nigel 91 prev = ecode - GET(ecode, 1);
1141 nigel 77
1142 nigel 93 /* If this was a group that remembered the subject start, in order to break
1143     infinite repeats of empty string matches, retrieve the subject start from
1144     the chain. Otherwise, set it NULL. */
1145 nigel 77
1146 nigel 93 if (*prev >= OP_SBRA)
1147     {
1148     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1149     eptrb = eptrb->epb_prev; /* Backup to previous group */
1150     }
1151     else saved_eptr = NULL;
1152 nigel 77
1153 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1154     MATCH_MATCH, but record the current high water mark for use by positive
1155     assertions. Do this also for the "once" (atomic) groups. */
1156    
1157 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1158     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1159     *prev == OP_ONCE)
1160     {
1161     md->end_match_ptr = eptr; /* For ONCE */
1162     md->end_offset_top = offset_top;
1163     RRETURN(MATCH_MATCH);
1164     }
1165 nigel 77
1166 nigel 93 /* For capturing groups we have to check the group number back at the start
1167     and if necessary complete handling an extraction by setting the offsets and
1168     bumping the high water mark. Note that whole-pattern recursion is coded as
1169     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1170     when the OP_END is reached. Other recursion is handled here. */
1171 nigel 77
1172 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1173 nigel 91 {
1174 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1175 nigel 91 offset = number << 1;
1176 nigel 77
1177     #ifdef DEBUG
1178 nigel 91 printf("end bracket %d", number);
1179     printf("\n");
1180 nigel 77 #endif
1181    
1182 nigel 93 md->capture_last = number;
1183     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1184 nigel 91 {
1185 nigel 93 md->offset_vector[offset] =
1186     md->offset_vector[md->offset_end - number];
1187     md->offset_vector[offset+1] = eptr - md->start_subject;
1188     if (offset_top <= offset) offset_top = offset + 2;
1189     }
1190 nigel 77
1191 nigel 93 /* Handle a recursively called group. Restore the offsets
1192     appropriately and continue from after the call. */
1193 nigel 77
1194 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1195     {
1196     recursion_info *rec = md->recursive;
1197     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1198     md->recursive = rec->prevrec;
1199 ph10 168 mstart = rec->save_start;
1200 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1201     rec->saved_max * sizeof(int));
1202     ecode = rec->after_call;
1203     ims = original_ims;
1204     break;
1205 nigel 77 }
1206 nigel 91 }
1207 nigel 77
1208 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1209     flags, in case they got changed during the group. */
1210 nigel 77
1211 nigel 91 ims = original_ims;
1212     DPRINTF(("ims reset to %02lx\n", ims));
1213 nigel 77
1214 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1215     happens for a repeating ket if no characters were matched in the group.
1216     This is the forcible breaking of infinite loops as implemented in Perl
1217     5.005. If there is an options reset, it will get obeyed in the normal
1218     course of events. */
1219 nigel 77
1220 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1221     {
1222     ecode += 1 + LINK_SIZE;
1223     break;
1224     }
1225 nigel 77
1226 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1227     preceding bracket, in the appropriate order. In the second case, we can use
1228 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1229     unlimited repeat of a group that can match an empty string. */
1230 nigel 77
1231 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1232    
1233 nigel 91 if (*ecode == OP_KETRMIN)
1234     {
1235 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1236 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1237 ph10 197 if (flags != 0) /* Could match an empty string */
1238     {
1239     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1240     RRETURN(rrc);
1241     }
1242 nigel 91 ecode = prev;
1243     goto TAIL_RECURSE;
1244 nigel 77 }
1245 nigel 91 else /* OP_KETRMAX */
1246     {
1247 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1248 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249     ecode += 1 + LINK_SIZE;
1250 ph10 197 flags = 0;
1251 nigel 91 goto TAIL_RECURSE;
1252     }
1253     /* Control never gets here */
1254 nigel 77
1255     /* Start of subject unless notbol, or after internal newline if multiline */
1256    
1257     case OP_CIRC:
1258     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1259     if ((ims & PCRE_MULTILINE) != 0)
1260     {
1261 nigel 91 if (eptr != md->start_subject &&
1262 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1263 nigel 77 RRETURN(MATCH_NOMATCH);
1264     ecode++;
1265     break;
1266     }
1267     /* ... else fall through */
1268    
1269     /* Start of subject assertion */
1270    
1271     case OP_SOD:
1272     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1273     ecode++;
1274     break;
1275    
1276     /* Start of match assertion */
1277    
1278     case OP_SOM:
1279     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1280     ecode++;
1281     break;
1282 ph10 172
1283 ph10 168 /* Reset the start of match point */
1284 ph10 172
1285 ph10 168 case OP_SET_SOM:
1286     mstart = eptr;
1287 ph10 172 ecode++;
1288     break;
1289 nigel 77
1290     /* Assert before internal newline if multiline, or before a terminating
1291     newline unless endonly is set, else end of subject unless noteol is set. */
1292    
1293     case OP_DOLL:
1294     if ((ims & PCRE_MULTILINE) != 0)
1295     {
1296     if (eptr < md->end_subject)
1297 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1298 nigel 77 else
1299     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1300     ecode++;
1301     break;
1302     }
1303     else
1304     {
1305     if (md->noteol) RRETURN(MATCH_NOMATCH);
1306     if (!md->endonly)
1307     {
1308 nigel 91 if (eptr != md->end_subject &&
1309 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1310 nigel 77 RRETURN(MATCH_NOMATCH);
1311     ecode++;
1312     break;
1313     }
1314     }
1315 nigel 91 /* ... else fall through for endonly */
1316 nigel 77
1317     /* End of subject assertion (\z) */
1318    
1319     case OP_EOD:
1320     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1321     ecode++;
1322     break;
1323    
1324     /* End of subject or ending \n assertion (\Z) */
1325    
1326     case OP_EODN:
1327 nigel 91 if (eptr != md->end_subject &&
1328 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1329 nigel 91 RRETURN(MATCH_NOMATCH);
1330 nigel 77 ecode++;
1331     break;
1332    
1333     /* Word boundary assertions */
1334    
1335     case OP_NOT_WORD_BOUNDARY:
1336     case OP_WORD_BOUNDARY:
1337     {
1338    
1339     /* Find out if the previous and current characters are "word" characters.
1340     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1341     be "non-word" characters. */
1342    
1343     #ifdef SUPPORT_UTF8
1344     if (utf8)
1345     {
1346     if (eptr == md->start_subject) prev_is_word = FALSE; else
1347     {
1348     const uschar *lastptr = eptr - 1;
1349     while((*lastptr & 0xc0) == 0x80) lastptr--;
1350     GETCHAR(c, lastptr);
1351     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1352     }
1353     if (eptr >= md->end_subject) cur_is_word = FALSE; else
1354     {
1355     GETCHAR(c, eptr);
1356     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1357     }
1358     }
1359     else
1360     #endif
1361    
1362     /* More streamlined when not in UTF-8 mode */
1363    
1364     {
1365     prev_is_word = (eptr != md->start_subject) &&
1366     ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1367     cur_is_word = (eptr < md->end_subject) &&
1368     ((md->ctypes[*eptr] & ctype_word) != 0);
1369     }
1370    
1371     /* Now see if the situation is what we want */
1372    
1373     if ((*ecode++ == OP_WORD_BOUNDARY)?
1374     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1375     RRETURN(MATCH_NOMATCH);
1376     }
1377     break;
1378    
1379     /* Match a single character type; inline for speed */
1380    
1381     case OP_ANY:
1382 nigel 91 if ((ims & PCRE_DOTALL) == 0)
1383     {
1384 nigel 93 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1385 nigel 91 }
1386 nigel 77 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1387     if (utf8)
1388     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1389     ecode++;
1390     break;
1391    
1392     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1393     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1394    
1395     case OP_ANYBYTE:
1396     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1397     ecode++;
1398     break;
1399    
1400     case OP_NOT_DIGIT:
1401     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1402     GETCHARINCTEST(c, eptr);
1403     if (
1404     #ifdef SUPPORT_UTF8
1405     c < 256 &&
1406     #endif
1407     (md->ctypes[c] & ctype_digit) != 0
1408     )
1409     RRETURN(MATCH_NOMATCH);
1410     ecode++;
1411     break;
1412    
1413     case OP_DIGIT:
1414     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1415     GETCHARINCTEST(c, eptr);
1416     if (
1417     #ifdef SUPPORT_UTF8
1418     c >= 256 ||
1419     #endif
1420     (md->ctypes[c] & ctype_digit) == 0
1421     )
1422     RRETURN(MATCH_NOMATCH);
1423     ecode++;
1424     break;
1425    
1426     case OP_NOT_WHITESPACE:
1427     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1428     GETCHARINCTEST(c, eptr);
1429     if (
1430     #ifdef SUPPORT_UTF8
1431     c < 256 &&
1432     #endif
1433     (md->ctypes[c] & ctype_space) != 0
1434     )
1435     RRETURN(MATCH_NOMATCH);
1436     ecode++;
1437     break;
1438    
1439     case OP_WHITESPACE:
1440     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1441     GETCHARINCTEST(c, eptr);
1442     if (
1443     #ifdef SUPPORT_UTF8
1444     c >= 256 ||
1445     #endif
1446     (md->ctypes[c] & ctype_space) == 0
1447     )
1448     RRETURN(MATCH_NOMATCH);
1449     ecode++;
1450     break;
1451    
1452     case OP_NOT_WORDCHAR:
1453     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1454     GETCHARINCTEST(c, eptr);
1455     if (
1456     #ifdef SUPPORT_UTF8
1457     c < 256 &&
1458     #endif
1459     (md->ctypes[c] & ctype_word) != 0
1460     )
1461     RRETURN(MATCH_NOMATCH);
1462     ecode++;
1463     break;
1464    
1465     case OP_WORDCHAR:
1466     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467     GETCHARINCTEST(c, eptr);
1468     if (
1469     #ifdef SUPPORT_UTF8
1470     c >= 256 ||
1471     #endif
1472     (md->ctypes[c] & ctype_word) == 0
1473     )
1474     RRETURN(MATCH_NOMATCH);
1475     ecode++;
1476     break;
1477    
1478 nigel 93 case OP_ANYNL:
1479     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1480     GETCHARINCTEST(c, eptr);
1481     switch(c)
1482     {
1483     default: RRETURN(MATCH_NOMATCH);
1484     case 0x000d:
1485     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1486     break;
1487     case 0x000a:
1488     case 0x000b:
1489     case 0x000c:
1490     case 0x0085:
1491     case 0x2028:
1492     case 0x2029:
1493     break;
1494     }
1495     ecode++;
1496     break;
1497    
1498 ph10 178 case OP_NOT_HSPACE:
1499     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1500     GETCHARINCTEST(c, eptr);
1501     switch(c)
1502     {
1503     default: break;
1504     case 0x09: /* HT */
1505     case 0x20: /* SPACE */
1506     case 0xa0: /* NBSP */
1507     case 0x1680: /* OGHAM SPACE MARK */
1508     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1509     case 0x2000: /* EN QUAD */
1510     case 0x2001: /* EM QUAD */
1511     case 0x2002: /* EN SPACE */
1512     case 0x2003: /* EM SPACE */
1513     case 0x2004: /* THREE-PER-EM SPACE */
1514     case 0x2005: /* FOUR-PER-EM SPACE */
1515     case 0x2006: /* SIX-PER-EM SPACE */
1516     case 0x2007: /* FIGURE SPACE */
1517     case 0x2008: /* PUNCTUATION SPACE */
1518     case 0x2009: /* THIN SPACE */
1519     case 0x200A: /* HAIR SPACE */
1520     case 0x202f: /* NARROW NO-BREAK SPACE */
1521     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1522     case 0x3000: /* IDEOGRAPHIC SPACE */
1523     RRETURN(MATCH_NOMATCH);
1524     }
1525     ecode++;
1526     break;
1527    
1528     case OP_HSPACE:
1529     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1530     GETCHARINCTEST(c, eptr);
1531     switch(c)
1532     {
1533     default: RRETURN(MATCH_NOMATCH);
1534     case 0x09: /* HT */
1535     case 0x20: /* SPACE */
1536     case 0xa0: /* NBSP */
1537     case 0x1680: /* OGHAM SPACE MARK */
1538     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1539     case 0x2000: /* EN QUAD */
1540     case 0x2001: /* EM QUAD */
1541     case 0x2002: /* EN SPACE */
1542     case 0x2003: /* EM SPACE */
1543     case 0x2004: /* THREE-PER-EM SPACE */
1544     case 0x2005: /* FOUR-PER-EM SPACE */
1545     case 0x2006: /* SIX-PER-EM SPACE */
1546     case 0x2007: /* FIGURE SPACE */
1547     case 0x2008: /* PUNCTUATION SPACE */
1548     case 0x2009: /* THIN SPACE */
1549     case 0x200A: /* HAIR SPACE */
1550     case 0x202f: /* NARROW NO-BREAK SPACE */
1551     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1552     case 0x3000: /* IDEOGRAPHIC SPACE */
1553     break;
1554     }
1555     ecode++;
1556     break;
1557    
1558     case OP_NOT_VSPACE:
1559     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1560     GETCHARINCTEST(c, eptr);
1561     switch(c)
1562     {
1563     default: break;
1564     case 0x0a: /* LF */
1565     case 0x0b: /* VT */
1566     case 0x0c: /* FF */
1567     case 0x0d: /* CR */
1568     case 0x85: /* NEL */
1569     case 0x2028: /* LINE SEPARATOR */
1570     case 0x2029: /* PARAGRAPH SEPARATOR */
1571     RRETURN(MATCH_NOMATCH);
1572     }
1573     ecode++;
1574     break;
1575    
1576     case OP_VSPACE:
1577     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1578     GETCHARINCTEST(c, eptr);
1579     switch(c)
1580     {
1581     default: RRETURN(MATCH_NOMATCH);
1582     case 0x0a: /* LF */
1583     case 0x0b: /* VT */
1584     case 0x0c: /* FF */
1585     case 0x0d: /* CR */
1586     case 0x85: /* NEL */
1587     case 0x2028: /* LINE SEPARATOR */
1588     case 0x2029: /* PARAGRAPH SEPARATOR */
1589     break;
1590     }
1591     ecode++;
1592     break;
1593    
1594 nigel 77 #ifdef SUPPORT_UCP
1595     /* Check the next character by Unicode property. We will get here only
1596     if the support is in the binary; otherwise a compile-time error occurs. */
1597    
1598     case OP_PROP:
1599     case OP_NOTPROP:
1600     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1601     GETCHARINCTEST(c, eptr);
1602     {
1603 nigel 87 int chartype, script;
1604     int category = _pcre_ucp_findprop(c, &chartype, &script);
1605 nigel 77
1606 nigel 87 switch(ecode[1])
1607     {
1608     case PT_ANY:
1609     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1610     break;
1611 nigel 77
1612 nigel 87 case PT_LAMP:
1613     if ((chartype == ucp_Lu ||
1614     chartype == ucp_Ll ||
1615     chartype == ucp_Lt) == (op == OP_NOTPROP))
1616 nigel 77 RRETURN(MATCH_NOMATCH);
1617 nigel 87 break;
1618    
1619     case PT_GC:
1620     if ((ecode[2] != category) == (op == OP_PROP))
1621 nigel 77 RRETURN(MATCH_NOMATCH);
1622 nigel 87 break;
1623    
1624     case PT_PC:
1625     if ((ecode[2] != chartype) == (op == OP_PROP))
1626     RRETURN(MATCH_NOMATCH);
1627     break;
1628    
1629     case PT_SC:
1630     if ((ecode[2] != script) == (op == OP_PROP))
1631     RRETURN(MATCH_NOMATCH);
1632     break;
1633    
1634     default:
1635     RRETURN(PCRE_ERROR_INTERNAL);
1636 nigel 77 }
1637 nigel 87
1638     ecode += 3;
1639 nigel 77 }
1640     break;
1641    
1642     /* Match an extended Unicode sequence. We will get here only if the support
1643     is in the binary; otherwise a compile-time error occurs. */
1644    
1645     case OP_EXTUNI:
1646     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1647     GETCHARINCTEST(c, eptr);
1648     {
1649 nigel 87 int chartype, script;
1650     int category = _pcre_ucp_findprop(c, &chartype, &script);
1651 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1652     while (eptr < md->end_subject)
1653     {
1654     int len = 1;
1655     if (!utf8) c = *eptr; else
1656     {
1657     GETCHARLEN(c, eptr, len);
1658     }
1659 nigel 87 category = _pcre_ucp_findprop(c, &chartype, &script);
1660 nigel 77 if (category != ucp_M) break;
1661     eptr += len;
1662     }
1663     }
1664     ecode++;
1665     break;
1666     #endif
1667    
1668    
1669     /* Match a back reference, possibly repeatedly. Look past the end of the
1670     item to see if there is repeat information following. The code is similar
1671     to that for character classes, but repeated for efficiency. Then obey
1672     similar code to character type repeats - written out again for speed.
1673     However, if the referenced string is the empty string, always treat
1674     it as matched, any number of times (otherwise there could be infinite
1675     loops). */
1676    
1677     case OP_REF:
1678     {
1679     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1680     ecode += 3; /* Advance past item */
1681    
1682     /* If the reference is unset, set the length to be longer than the amount
1683     of subject left; this ensures that every attempt at a match fails. We
1684     can't just fail here, because of the possibility of quantifiers with zero
1685     minima. */
1686    
1687     length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1688     md->end_subject - eptr + 1 :
1689     md->offset_vector[offset+1] - md->offset_vector[offset];
1690    
1691     /* Set up for repetition, or handle the non-repeated case */
1692    
1693     switch (*ecode)
1694     {
1695     case OP_CRSTAR:
1696     case OP_CRMINSTAR:
1697     case OP_CRPLUS:
1698     case OP_CRMINPLUS:
1699     case OP_CRQUERY:
1700     case OP_CRMINQUERY:
1701     c = *ecode++ - OP_CRSTAR;
1702     minimize = (c & 1) != 0;
1703     min = rep_min[c]; /* Pick up values from tables; */
1704     max = rep_max[c]; /* zero for max => infinity */
1705     if (max == 0) max = INT_MAX;
1706     break;
1707    
1708     case OP_CRRANGE:
1709     case OP_CRMINRANGE:
1710     minimize = (*ecode == OP_CRMINRANGE);
1711     min = GET2(ecode, 1);
1712     max = GET2(ecode, 3);
1713     if (max == 0) max = INT_MAX;
1714     ecode += 5;
1715     break;
1716    
1717     default: /* No repeat follows */
1718     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1719     eptr += length;
1720     continue; /* With the main loop */
1721     }
1722    
1723     /* If the length of the reference is zero, just continue with the
1724     main loop. */
1725    
1726     if (length == 0) continue;
1727    
1728     /* First, ensure the minimum number of matches are present. We get back
1729     the length of the reference string explicitly rather than passing the
1730     address of eptr, so that eptr can be a register variable. */
1731    
1732     for (i = 1; i <= min; i++)
1733     {
1734     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1735     eptr += length;
1736     }
1737    
1738     /* If min = max, continue at the same level without recursion.
1739     They are not both allowed to be zero. */
1740    
1741     if (min == max) continue;
1742    
1743     /* If minimizing, keep trying and advancing the pointer */
1744    
1745     if (minimize)
1746     {
1747     for (fi = min;; fi++)
1748     {
1749 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1750 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1751     if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1752     RRETURN(MATCH_NOMATCH);
1753     eptr += length;
1754     }
1755     /* Control never gets here */
1756     }
1757    
1758     /* If maximizing, find the longest string and work backwards */
1759    
1760     else
1761     {
1762     pp = eptr;
1763     for (i = min; i < max; i++)
1764     {
1765     if (!match_ref(offset, eptr, length, md, ims)) break;
1766     eptr += length;
1767     }
1768     while (eptr >= pp)
1769     {
1770 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1771 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772     eptr -= length;
1773     }
1774     RRETURN(MATCH_NOMATCH);
1775     }
1776     }
1777     /* Control never gets here */
1778    
1779    
1780    
1781     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1782     used when all the characters in the class have values in the range 0-255,
1783     and either the matching is caseful, or the characters are in the range
1784     0-127 when UTF-8 processing is enabled. The only difference between
1785     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1786     encountered.
1787    
1788     First, look past the end of the item to see if there is repeat information
1789     following. Then obey similar code to character type repeats - written out
1790     again for speed. */
1791    
1792     case OP_NCLASS:
1793     case OP_CLASS:
1794     {
1795     data = ecode + 1; /* Save for matching */
1796     ecode += 33; /* Advance past the item */
1797    
1798     switch (*ecode)
1799     {
1800     case OP_CRSTAR:
1801     case OP_CRMINSTAR:
1802     case OP_CRPLUS:
1803     case OP_CRMINPLUS:
1804     case OP_CRQUERY:
1805     case OP_CRMINQUERY:
1806     c = *ecode++ - OP_CRSTAR;
1807     minimize = (c & 1) != 0;
1808     min = rep_min[c]; /* Pick up values from tables; */
1809     max = rep_max[c]; /* zero for max => infinity */
1810     if (max == 0) max = INT_MAX;
1811     break;
1812    
1813     case OP_CRRANGE:
1814     case OP_CRMINRANGE:
1815     minimize = (*ecode == OP_CRMINRANGE);
1816     min = GET2(ecode, 1);
1817     max = GET2(ecode, 3);
1818     if (max == 0) max = INT_MAX;
1819     ecode += 5;
1820     break;
1821    
1822     default: /* No repeat follows */
1823     min = max = 1;
1824     break;
1825     }
1826    
1827     /* First, ensure the minimum number of matches are present. */
1828    
1829     #ifdef SUPPORT_UTF8
1830     /* UTF-8 mode */
1831     if (utf8)
1832     {
1833     for (i = 1; i <= min; i++)
1834     {
1835     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1836     GETCHARINC(c, eptr);
1837     if (c > 255)
1838     {
1839     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1840     }
1841     else
1842     {
1843     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1844     }
1845     }
1846     }
1847     else
1848     #endif
1849     /* Not UTF-8 mode */
1850     {
1851     for (i = 1; i <= min; i++)
1852     {
1853     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1854     c = *eptr++;
1855     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1856     }
1857     }
1858    
1859     /* If max == min we can continue with the main loop without the
1860     need to recurse. */
1861    
1862     if (min == max) continue;
1863    
1864     /* If minimizing, keep testing the rest of the expression and advancing
1865     the pointer while it matches the class. */
1866    
1867     if (minimize)
1868     {
1869     #ifdef SUPPORT_UTF8
1870     /* UTF-8 mode */
1871     if (utf8)
1872     {
1873     for (fi = min;; fi++)
1874     {
1875 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1876 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1877     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1878     GETCHARINC(c, eptr);
1879     if (c > 255)
1880     {
1881     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1882     }
1883     else
1884     {
1885     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1886     }
1887     }
1888     }
1889     else
1890     #endif
1891     /* Not UTF-8 mode */
1892     {
1893     for (fi = min;; fi++)
1894     {
1895 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1896 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1897     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898     c = *eptr++;
1899     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1900     }
1901     }
1902     /* Control never gets here */
1903     }
1904    
1905     /* If maximizing, find the longest possible run, then work backwards. */
1906    
1907     else
1908     {
1909     pp = eptr;
1910    
1911     #ifdef SUPPORT_UTF8
1912     /* UTF-8 mode */
1913     if (utf8)
1914     {
1915     for (i = min; i < max; i++)
1916     {
1917     int len = 1;
1918     if (eptr >= md->end_subject) break;
1919     GETCHARLEN(c, eptr, len);
1920     if (c > 255)
1921     {
1922     if (op == OP_CLASS) break;
1923     }
1924     else
1925     {
1926     if ((data[c/8] & (1 << (c&7))) == 0) break;
1927     }
1928     eptr += len;
1929     }
1930     for (;;)
1931     {
1932 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1933 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1934     if (eptr-- == pp) break; /* Stop if tried at original pos */
1935     BACKCHAR(eptr);
1936     }
1937     }
1938     else
1939     #endif
1940     /* Not UTF-8 mode */
1941     {
1942     for (i = min; i < max; i++)
1943     {
1944     if (eptr >= md->end_subject) break;
1945     c = *eptr;
1946     if ((data[c/8] & (1 << (c&7))) == 0) break;
1947     eptr++;
1948     }
1949     while (eptr >= pp)
1950     {
1951 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1952 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1953 nigel 77 eptr--;
1954     }
1955     }
1956    
1957     RRETURN(MATCH_NOMATCH);
1958     }
1959     }
1960     /* Control never gets here */
1961    
1962    
1963     /* Match an extended character class. This opcode is encountered only
1964     in UTF-8 mode, because that's the only time it is compiled. */
1965    
1966     #ifdef SUPPORT_UTF8
1967     case OP_XCLASS:
1968     {
1969     data = ecode + 1 + LINK_SIZE; /* Save for matching */
1970     ecode += GET(ecode, 1); /* Advance past the item */
1971    
1972     switch (*ecode)
1973     {
1974     case OP_CRSTAR:
1975     case OP_CRMINSTAR:
1976     case OP_CRPLUS:
1977     case OP_CRMINPLUS:
1978     case OP_CRQUERY:
1979     case OP_CRMINQUERY:
1980     c = *ecode++ - OP_CRSTAR;
1981     minimize = (c & 1) != 0;
1982     min = rep_min[c]; /* Pick up values from tables; */
1983     max = rep_max[c]; /* zero for max => infinity */
1984     if (max == 0) max = INT_MAX;
1985     break;
1986    
1987     case OP_CRRANGE:
1988     case OP_CRMINRANGE:
1989     minimize = (*ecode == OP_CRMINRANGE);
1990     min = GET2(ecode, 1);
1991     max = GET2(ecode, 3);
1992     if (max == 0) max = INT_MAX;
1993     ecode += 5;
1994     break;
1995    
1996     default: /* No repeat follows */
1997     min = max = 1;
1998     break;
1999     }
2000    
2001     /* First, ensure the minimum number of matches are present. */
2002    
2003     for (i = 1; i <= min; i++)
2004     {
2005     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2006     GETCHARINC(c, eptr);
2007     if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2008     }
2009    
2010     /* If max == min we can continue with the main loop without the
2011     need to recurse. */
2012    
2013     if (min == max) continue;
2014    
2015     /* If minimizing, keep testing the rest of the expression and advancing
2016     the pointer while it matches the class. */
2017    
2018     if (minimize)
2019     {
2020     for (fi = min;; fi++)
2021     {
2022 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2023 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2024     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2025     GETCHARINC(c, eptr);
2026     if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2027     }
2028     /* Control never gets here */
2029     }
2030    
2031     /* If maximizing, find the longest possible run, then work backwards. */
2032    
2033     else
2034     {
2035     pp = eptr;
2036     for (i = min; i < max; i++)
2037     {
2038     int len = 1;
2039     if (eptr >= md->end_subject) break;
2040     GETCHARLEN(c, eptr, len);
2041     if (!_pcre_xclass(c, data)) break;
2042     eptr += len;
2043     }
2044     for(;;)
2045     {
2046 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2047 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048     if (eptr-- == pp) break; /* Stop if tried at original pos */
2049     BACKCHAR(eptr)
2050     }
2051     RRETURN(MATCH_NOMATCH);
2052     }
2053    
2054     /* Control never gets here */
2055     }
2056     #endif /* End of XCLASS */
2057    
2058     /* Match a single character, casefully */
2059    
2060     case OP_CHAR:
2061     #ifdef SUPPORT_UTF8
2062     if (utf8)
2063     {
2064     length = 1;
2065     ecode++;
2066     GETCHARLEN(fc, ecode, length);
2067     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2068     while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2069     }
2070     else
2071     #endif
2072    
2073     /* Non-UTF-8 mode */
2074     {
2075     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2076     if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2077     ecode += 2;
2078     }
2079     break;
2080    
2081     /* Match a single character, caselessly */
2082    
2083     case OP_CHARNC:
2084     #ifdef SUPPORT_UTF8
2085     if (utf8)
2086     {
2087     length = 1;
2088     ecode++;
2089     GETCHARLEN(fc, ecode, length);
2090    
2091     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2092    
2093     /* If the pattern character's value is < 128, we have only one byte, and
2094     can use the fast lookup table. */
2095    
2096     if (fc < 128)
2097     {
2098     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2099     }
2100    
2101     /* Otherwise we must pick up the subject character */
2102    
2103     else
2104     {
2105 nigel 93 unsigned int dc;
2106 nigel 77 GETCHARINC(dc, eptr);
2107     ecode += length;
2108    
2109     /* If we have Unicode property support, we can use it to test the other
2110 nigel 87 case of the character, if there is one. */
2111 nigel 77
2112     if (fc != dc)
2113     {
2114     #ifdef SUPPORT_UCP
2115 nigel 87 if (dc != _pcre_ucp_othercase(fc))
2116 nigel 77 #endif
2117     RRETURN(MATCH_NOMATCH);
2118     }
2119     }
2120     }
2121     else
2122     #endif /* SUPPORT_UTF8 */
2123    
2124     /* Non-UTF-8 mode */
2125     {
2126     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2127     if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2128     ecode += 2;
2129     }
2130     break;
2131    
2132 nigel 93 /* Match a single character repeatedly. */
2133 nigel 77
2134     case OP_EXACT:
2135     min = max = GET2(ecode, 1);
2136     ecode += 3;
2137     goto REPEATCHAR;
2138    
2139 nigel 93 case OP_POSUPTO:
2140     possessive = TRUE;
2141     /* Fall through */
2142    
2143 nigel 77 case OP_UPTO:
2144     case OP_MINUPTO:
2145     min = 0;
2146     max = GET2(ecode, 1);
2147     minimize = *ecode == OP_MINUPTO;
2148     ecode += 3;
2149     goto REPEATCHAR;
2150    
2151 nigel 93 case OP_POSSTAR:
2152     possessive = TRUE;
2153     min = 0;
2154     max = INT_MAX;
2155     ecode++;
2156     goto REPEATCHAR;
2157    
2158     case OP_POSPLUS:
2159     possessive = TRUE;
2160     min = 1;
2161     max = INT_MAX;
2162     ecode++;
2163     goto REPEATCHAR;
2164    
2165     case OP_POSQUERY:
2166     possessive = TRUE;
2167     min = 0;
2168     max = 1;
2169     ecode++;
2170     goto REPEATCHAR;
2171    
2172 nigel 77 case OP_STAR:
2173     case OP_MINSTAR:
2174     case OP_PLUS:
2175     case OP_MINPLUS:
2176     case OP_QUERY:
2177     case OP_MINQUERY:
2178     c = *ecode++ - OP_STAR;
2179     minimize = (c & 1) != 0;
2180     min = rep_min[c]; /* Pick up values from tables; */
2181     max = rep_max[c]; /* zero for max => infinity */
2182     if (max == 0) max = INT_MAX;
2183    
2184     /* Common code for all repeated single-character matches. We can give
2185     up quickly if there are fewer than the minimum number of characters left in
2186     the subject. */
2187    
2188     REPEATCHAR:
2189     #ifdef SUPPORT_UTF8
2190     if (utf8)
2191     {
2192     length = 1;
2193     charptr = ecode;
2194     GETCHARLEN(fc, ecode, length);
2195     if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2196     ecode += length;
2197    
2198     /* Handle multibyte character matching specially here. There is
2199     support for caseless matching if UCP support is present. */
2200    
2201     if (length > 1)
2202     {
2203     #ifdef SUPPORT_UCP
2204 nigel 93 unsigned int othercase;
2205 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2206 nigel 93 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2207 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2208 ph10 115 else oclength = 0;
2209 nigel 77 #endif /* SUPPORT_UCP */
2210    
2211     for (i = 1; i <= min; i++)
2212     {
2213     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2214 ph10 123 #ifdef SUPPORT_UCP
2215 nigel 77 /* Need braces because of following else */
2216     else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2217     else
2218     {
2219     if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2220     eptr += oclength;
2221     }
2222 ph10 115 #else /* without SUPPORT_UCP */
2223     else { RRETURN(MATCH_NOMATCH); }
2224 ph10 123 #endif /* SUPPORT_UCP */
2225 nigel 77 }
2226    
2227     if (min == max) continue;
2228    
2229     if (minimize)
2230     {
2231     for (fi = min;; fi++)
2232     {
2233 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2234 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2235     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2236     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2237 ph10 123 #ifdef SUPPORT_UCP
2238 nigel 77 /* Need braces because of following else */
2239     else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2240     else
2241     {
2242     if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2243     eptr += oclength;
2244     }
2245 ph10 115 #else /* without SUPPORT_UCP */
2246     else { RRETURN (MATCH_NOMATCH); }
2247     #endif /* SUPPORT_UCP */
2248 nigel 77 }
2249     /* Control never gets here */
2250     }
2251 nigel 93
2252     else /* Maximize */
2253 nigel 77 {
2254     pp = eptr;
2255     for (i = min; i < max; i++)
2256     {
2257     if (eptr > md->end_subject - length) break;
2258     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2259 ph10 123 #ifdef SUPPORT_UCP
2260 nigel 77 else if (oclength == 0) break;
2261     else
2262     {
2263     if (memcmp(eptr, occhars, oclength) != 0) break;
2264     eptr += oclength;
2265     }
2266 ph10 115 #else /* without SUPPORT_UCP */
2267     else break;
2268 ph10 123 #endif /* SUPPORT_UCP */
2269 nigel 77 }
2270 nigel 93
2271     if (possessive) continue;
2272 ph10 120 for(;;)
2273 nigel 77 {
2274 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2275 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2276 ph10 120 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2277 ph10 115 #ifdef SUPPORT_UCP
2278     eptr--;
2279     BACKCHAR(eptr);
2280 ph10 123 #else /* without SUPPORT_UCP */
2281 nigel 77 eptr -= length;
2282 ph10 123 #endif /* SUPPORT_UCP */
2283 nigel 77 }
2284     }
2285     /* Control never gets here */
2286     }
2287    
2288     /* If the length of a UTF-8 character is 1, we fall through here, and
2289     obey the code as for non-UTF-8 characters below, though in this case the
2290     value of fc will always be < 128. */
2291     }
2292     else
2293     #endif /* SUPPORT_UTF8 */
2294    
2295     /* When not in UTF-8 mode, load a single-byte character. */
2296     {
2297     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2298     fc = *ecode++;
2299     }
2300    
2301     /* The value of fc at this point is always less than 256, though we may or
2302     may not be in UTF-8 mode. The code is duplicated for the caseless and
2303     caseful cases, for speed, since matching characters is likely to be quite
2304     common. First, ensure the minimum number of matches are present. If min =
2305     max, continue at the same level without recursing. Otherwise, if
2306     minimizing, keep trying the rest of the expression and advancing one
2307     matching character if failing, up to the maximum. Alternatively, if
2308     maximizing, find the maximum number of characters and work backwards. */
2309    
2310     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2311     max, eptr));
2312    
2313     if ((ims & PCRE_CASELESS) != 0)
2314     {
2315     fc = md->lcc[fc];
2316     for (i = 1; i <= min; i++)
2317     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2318     if (min == max) continue;
2319     if (minimize)
2320     {
2321     for (fi = min;; fi++)
2322     {
2323 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2324 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2325     if (fi >= max || eptr >= md->end_subject ||
2326     fc != md->lcc[*eptr++])
2327     RRETURN(MATCH_NOMATCH);
2328     }
2329     /* Control never gets here */
2330     }
2331 nigel 93 else /* Maximize */
2332 nigel 77 {
2333     pp = eptr;
2334     for (i = min; i < max; i++)
2335     {
2336     if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2337     eptr++;
2338     }
2339 nigel 93 if (possessive) continue;
2340 nigel 77 while (eptr >= pp)
2341     {
2342 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2343 nigel 77 eptr--;
2344     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345     }
2346     RRETURN(MATCH_NOMATCH);
2347     }
2348     /* Control never gets here */
2349     }
2350    
2351     /* Caseful comparisons (includes all multi-byte characters) */
2352    
2353     else
2354     {
2355     for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2356     if (min == max) continue;
2357     if (minimize)
2358     {
2359     for (fi = min;; fi++)
2360     {
2361 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2362 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363     if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2364     RRETURN(MATCH_NOMATCH);
2365     }
2366     /* Control never gets here */
2367     }
2368 nigel 93 else /* Maximize */
2369 nigel 77 {
2370     pp = eptr;
2371     for (i = min; i < max; i++)
2372     {
2373     if (eptr >= md->end_subject || fc != *eptr) break;
2374     eptr++;
2375     }
2376 nigel 93 if (possessive) continue;
2377 nigel 77 while (eptr >= pp)
2378     {
2379 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2380 nigel 77 eptr--;
2381     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2382     }
2383     RRETURN(MATCH_NOMATCH);
2384     }
2385     }
2386     /* Control never gets here */
2387    
2388     /* Match a negated single one-byte character. The character we are
2389     checking can be multibyte. */
2390    
2391     case OP_NOT:
2392     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2393     ecode++;
2394     GETCHARINCTEST(c, eptr);
2395     if ((ims & PCRE_CASELESS) != 0)
2396     {
2397     #ifdef SUPPORT_UTF8
2398     if (c < 256)
2399     #endif
2400     c = md->lcc[c];
2401     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2402     }
2403     else
2404     {
2405     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2406     }
2407     break;
2408    
2409     /* Match a negated single one-byte character repeatedly. This is almost a
2410     repeat of the code for a repeated single character, but I haven't found a
2411     nice way of commoning these up that doesn't require a test of the
2412     positive/negative option for each character match. Maybe that wouldn't add
2413     very much to the time taken, but character matching *is* what this is all
2414     about... */
2415    
2416     case OP_NOTEXACT:
2417     min = max = GET2(ecode, 1);
2418     ecode += 3;
2419     goto REPEATNOTCHAR;
2420    
2421     case OP_NOTUPTO:
2422     case OP_NOTMINUPTO:
2423     min = 0;
2424     max = GET2(ecode, 1);
2425     minimize = *ecode == OP_NOTMINUPTO;
2426     ecode += 3;
2427     goto REPEATNOTCHAR;
2428    
2429 nigel 93 case OP_NOTPOSSTAR:
2430     possessive = TRUE;
2431     min = 0;
2432     max = INT_MAX;
2433     ecode++;
2434     goto REPEATNOTCHAR;
2435    
2436     case OP_NOTPOSPLUS:
2437     possessive = TRUE;
2438     min = 1;
2439     max = INT_MAX;
2440     ecode++;
2441     goto REPEATNOTCHAR;
2442    
2443     case OP_NOTPOSQUERY:
2444     possessive = TRUE;
2445     min = 0;
2446     max = 1;
2447     ecode++;
2448     goto REPEATNOTCHAR;
2449    
2450     case OP_NOTPOSUPTO:
2451     possessive = TRUE;
2452     min = 0;
2453     max = GET2(ecode, 1);
2454     ecode += 3;
2455     goto REPEATNOTCHAR;
2456    
2457 nigel 77 case OP_NOTSTAR:
2458     case OP_NOTMINSTAR:
2459     case OP_NOTPLUS:
2460     case OP_NOTMINPLUS:
2461     case OP_NOTQUERY:
2462     case OP_NOTMINQUERY:
2463     c = *ecode++ - OP_NOTSTAR;
2464     minimize = (c & 1) != 0;
2465     min = rep_min[c]; /* Pick up values from tables; */
2466     max = rep_max[c]; /* zero for max => infinity */
2467     if (max == 0) max = INT_MAX;
2468    
2469     /* Common code for all repeated single-byte matches. We can give up quickly
2470     if there are fewer than the minimum number of bytes left in the
2471     subject. */
2472    
2473     REPEATNOTCHAR:
2474     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2475     fc = *ecode++;
2476    
2477     /* The code is duplicated for the caseless and caseful cases, for speed,
2478     since matching characters is likely to be quite common. First, ensure the
2479     minimum number of matches are present. If min = max, continue at the same
2480     level without recursing. Otherwise, if minimizing, keep trying the rest of
2481     the expression and advancing one matching character if failing, up to the
2482     maximum. Alternatively, if maximizing, find the maximum number of
2483     characters and work backwards. */
2484    
2485     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2486     max, eptr));
2487    
2488     if ((ims & PCRE_CASELESS) != 0)
2489     {
2490     fc = md->lcc[fc];
2491    
2492     #ifdef SUPPORT_UTF8
2493     /* UTF-8 mode */
2494     if (utf8)
2495     {
2496 nigel 93 register unsigned int d;
2497 nigel 77 for (i = 1; i <= min; i++)
2498     {
2499     GETCHARINC(d, eptr);
2500     if (d < 256) d = md->lcc[d];
2501     if (fc == d) RRETURN(MATCH_NOMATCH);
2502     }
2503     }
2504     else
2505     #endif
2506    
2507     /* Not UTF-8 mode */
2508     {
2509     for (i = 1; i <= min; i++)
2510     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2511     }
2512    
2513     if (min == max) continue;
2514    
2515     if (minimize)
2516     {
2517     #ifdef SUPPORT_UTF8
2518     /* UTF-8 mode */
2519     if (utf8)
2520     {
2521 nigel 93 register unsigned int d;
2522 nigel 77 for (fi = min;; fi++)
2523     {
2524 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2525 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2526     GETCHARINC(d, eptr);
2527     if (d < 256) d = md->lcc[d];
2528     if (fi >= max || eptr >= md->end_subject || fc == d)
2529     RRETURN(MATCH_NOMATCH);
2530     }
2531     }
2532     else
2533     #endif
2534     /* Not UTF-8 mode */
2535     {
2536     for (fi = min;; fi++)
2537     {
2538 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2539 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540     if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2541     RRETURN(MATCH_NOMATCH);
2542     }
2543     }
2544     /* Control never gets here */
2545     }
2546    
2547     /* Maximize case */
2548    
2549     else
2550     {
2551     pp = eptr;
2552    
2553     #ifdef SUPPORT_UTF8
2554     /* UTF-8 mode */
2555     if (utf8)
2556     {
2557 nigel 93 register unsigned int d;
2558 nigel 77 for (i = min; i < max; i++)
2559     {
2560     int len = 1;
2561     if (eptr >= md->end_subject) break;
2562     GETCHARLEN(d, eptr, len);
2563     if (d < 256) d = md->lcc[d];
2564     if (fc == d) break;
2565     eptr += len;
2566     }
2567 nigel 93 if (possessive) continue;
2568     for(;;)
2569 nigel 77 {
2570 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2571 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2572     if (eptr-- == pp) break; /* Stop if tried at original pos */
2573     BACKCHAR(eptr);
2574     }
2575     }
2576     else
2577     #endif
2578     /* Not UTF-8 mode */
2579     {
2580     for (i = min; i < max; i++)
2581     {
2582     if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2583     eptr++;
2584     }
2585 nigel 93 if (possessive) continue;
2586 nigel 77 while (eptr >= pp)
2587     {
2588 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2589 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590     eptr--;
2591     }
2592     }
2593    
2594     RRETURN(MATCH_NOMATCH);
2595     }
2596     /* Control never gets here */
2597     }
2598    
2599     /* Caseful comparisons */
2600    
2601     else
2602     {
2603     #ifdef SUPPORT_UTF8
2604     /* UTF-8 mode */
2605     if (utf8)
2606     {
2607 nigel 93 register unsigned int d;
2608 nigel 77 for (i = 1; i <= min; i++)
2609     {
2610     GETCHARINC(d, eptr);
2611     if (fc == d) RRETURN(MATCH_NOMATCH);
2612     }
2613     }
2614     else
2615     #endif
2616     /* Not UTF-8 mode */
2617     {
2618     for (i = 1; i <= min; i++)
2619     if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2620     }
2621    
2622     if (min == max) continue;
2623    
2624     if (minimize)
2625     {
2626     #ifdef SUPPORT_UTF8
2627     /* UTF-8 mode */
2628     if (utf8)
2629     {
2630 nigel 93 register unsigned int d;
2631 nigel 77 for (fi = min;; fi++)
2632     {
2633 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2634 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635     GETCHARINC(d, eptr);
2636     if (fi >= max || eptr >= md->end_subject || fc == d)
2637     RRETURN(MATCH_NOMATCH);
2638     }
2639     }
2640     else
2641     #endif
2642     /* Not UTF-8 mode */
2643     {
2644     for (fi = min;; fi++)
2645     {
2646 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2647 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2648     if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2649     RRETURN(MATCH_NOMATCH);
2650     }
2651     }
2652     /* Control never gets here */
2653     }
2654    
2655     /* Maximize case */
2656    
2657     else
2658     {
2659     pp = eptr;
2660    
2661     #ifdef SUPPORT_UTF8
2662     /* UTF-8 mode */
2663     if (utf8)
2664     {
2665 nigel 93 register unsigned int d;
2666 nigel 77 for (i = min; i < max; i++)
2667     {
2668     int len = 1;
2669     if (eptr >= md->end_subject) break;
2670     GETCHARLEN(d, eptr, len);
2671     if (fc == d) break;
2672     eptr += len;
2673     }
2674 nigel 93 if (possessive) continue;
2675 nigel 77 for(;;)
2676     {
2677 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2678 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2679     if (eptr-- == pp) break; /* Stop if tried at original pos */
2680     BACKCHAR(eptr);
2681     }
2682     }
2683     else
2684     #endif
2685     /* Not UTF-8 mode */
2686     {
2687     for (i = min; i < max; i++)
2688     {
2689     if (eptr >= md->end_subject || fc == *eptr) break;
2690     eptr++;
2691     }
2692 nigel 93 if (possessive) continue;
2693 nigel 77 while (eptr >= pp)
2694     {
2695 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2696 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697     eptr--;
2698     }
2699     }
2700    
2701     RRETURN(MATCH_NOMATCH);
2702     }
2703     }
2704     /* Control never gets here */
2705    
2706     /* Match a single character type repeatedly; several different opcodes
2707     share code. This is very similar to the code for single characters, but we
2708     repeat it in the interests of efficiency. */
2709    
2710     case OP_TYPEEXACT:
2711     min = max = GET2(ecode, 1);
2712     minimize = TRUE;
2713     ecode += 3;
2714     goto REPEATTYPE;
2715    
2716     case OP_TYPEUPTO:
2717     case OP_TYPEMINUPTO:
2718     min = 0;
2719     max = GET2(ecode, 1);
2720     minimize = *ecode == OP_TYPEMINUPTO;
2721     ecode += 3;
2722     goto REPEATTYPE;
2723    
2724 nigel 93 case OP_TYPEPOSSTAR:
2725     possessive = TRUE;
2726     min = 0;
2727     max = INT_MAX;
2728     ecode++;
2729     goto REPEATTYPE;
2730    
2731     case OP_TYPEPOSPLUS:
2732     possessive = TRUE;
2733     min = 1;
2734     max = INT_MAX;
2735     ecode++;
2736     goto REPEATTYPE;
2737    
2738     case OP_TYPEPOSQUERY:
2739     possessive = TRUE;
2740     min = 0;
2741     max = 1;
2742     ecode++;
2743     goto REPEATTYPE;
2744    
2745     case OP_TYPEPOSUPTO:
2746     possessive = TRUE;
2747     min = 0;
2748     max = GET2(ecode, 1);
2749     ecode += 3;
2750     goto REPEATTYPE;
2751    
2752 nigel 77 case OP_TYPESTAR:
2753     case OP_TYPEMINSTAR:
2754     case OP_TYPEPLUS:
2755     case OP_TYPEMINPLUS:
2756     case OP_TYPEQUERY:
2757     case OP_TYPEMINQUERY:
2758     c = *ecode++ - OP_TYPESTAR;
2759     minimize = (c & 1) != 0;
2760     min = rep_min[c]; /* Pick up values from tables; */
2761     max = rep_max[c]; /* zero for max => infinity */
2762     if (max == 0) max = INT_MAX;
2763    
2764     /* Common code for all repeated single character type matches. Note that
2765     in UTF-8 mode, '.' matches a character of any length, but for the other
2766     character types, the valid characters are all one-byte long. */
2767    
2768     REPEATTYPE:
2769     ctype = *ecode++; /* Code for the character type */
2770    
2771     #ifdef SUPPORT_UCP
2772     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2773     {
2774     prop_fail_result = ctype == OP_NOTPROP;
2775     prop_type = *ecode++;
2776 nigel 87 prop_value = *ecode++;
2777 nigel 77 }
2778     else prop_type = -1;
2779     #endif
2780    
2781     /* First, ensure the minimum number of matches are present. Use inline
2782     code for maximizing the speed, and do the type test once at the start
2783     (i.e. keep it out of the loop). Also we can test that there are at least
2784     the minimum number of bytes before we start. This isn't as effective in
2785     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2786     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2787     and single-bytes. */
2788    
2789     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2790     if (min > 0)
2791     {
2792     #ifdef SUPPORT_UCP
2793 nigel 87 if (prop_type >= 0)
2794 nigel 77 {
2795 nigel 87 switch(prop_type)
2796 nigel 77 {
2797 nigel 87 case PT_ANY:
2798     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2799     for (i = 1; i <= min; i++)
2800     {
2801     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2802 ph10 184 GETCHARINCTEST(c, eptr);
2803 nigel 87 }
2804     break;
2805    
2806     case PT_LAMP:
2807     for (i = 1; i <= min; i++)
2808     {
2809     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810 ph10 184 GETCHARINCTEST(c, eptr);
2811 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812     if ((prop_chartype == ucp_Lu ||
2813     prop_chartype == ucp_Ll ||
2814     prop_chartype == ucp_Lt) == prop_fail_result)
2815     RRETURN(MATCH_NOMATCH);
2816     }
2817     break;
2818    
2819     case PT_GC:
2820     for (i = 1; i <= min; i++)
2821     {
2822     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823 ph10 184 GETCHARINCTEST(c, eptr);
2824 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825     if ((prop_category == prop_value) == prop_fail_result)
2826     RRETURN(MATCH_NOMATCH);
2827     }
2828     break;
2829    
2830     case PT_PC:
2831     for (i = 1; i <= min; i++)
2832     {
2833     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834 ph10 184 GETCHARINCTEST(c, eptr);
2835 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836     if ((prop_chartype == prop_value) == prop_fail_result)
2837     RRETURN(MATCH_NOMATCH);
2838     }
2839     break;
2840    
2841     case PT_SC:
2842     for (i = 1; i <= min; i++)
2843     {
2844     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845 ph10 184 GETCHARINCTEST(c, eptr);
2846 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2847     if ((prop_script == prop_value) == prop_fail_result)
2848     RRETURN(MATCH_NOMATCH);
2849     }
2850     break;
2851    
2852     default:
2853     RRETURN(PCRE_ERROR_INTERNAL);
2854 nigel 77 }
2855     }
2856    
2857     /* Match extended Unicode sequences. We will get here only if the
2858     support is in the binary; otherwise a compile-time error occurs. */
2859    
2860     else if (ctype == OP_EXTUNI)
2861     {
2862     for (i = 1; i <= min; i++)
2863     {
2864     GETCHARINCTEST(c, eptr);
2865 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2866 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2867     while (eptr < md->end_subject)
2868     {
2869     int len = 1;
2870     if (!utf8) c = *eptr; else
2871     {
2872     GETCHARLEN(c, eptr, len);
2873     }
2874 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2875 nigel 77 if (prop_category != ucp_M) break;
2876     eptr += len;
2877     }
2878     }
2879     }
2880    
2881     else
2882     #endif /* SUPPORT_UCP */
2883    
2884     /* Handle all other cases when the coding is UTF-8 */
2885    
2886     #ifdef SUPPORT_UTF8
2887     if (utf8) switch(ctype)
2888     {
2889     case OP_ANY:
2890     for (i = 1; i <= min; i++)
2891     {
2892     if (eptr >= md->end_subject ||
2893 nigel 93 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2894 nigel 77 RRETURN(MATCH_NOMATCH);
2895 nigel 91 eptr++;
2896 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2897     }
2898     break;
2899    
2900     case OP_ANYBYTE:
2901     eptr += min;
2902     break;
2903    
2904 nigel 93 case OP_ANYNL:
2905     for (i = 1; i <= min; i++)
2906     {
2907     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2908     GETCHARINC(c, eptr);
2909     switch(c)
2910     {
2911     default: RRETURN(MATCH_NOMATCH);
2912     case 0x000d:
2913     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2914     break;
2915     case 0x000a:
2916     case 0x000b:
2917     case 0x000c:
2918     case 0x0085:
2919     case 0x2028:
2920     case 0x2029:
2921     break;
2922     }
2923     }
2924     break;
2925    
2926 ph10 178 case OP_NOT_HSPACE:
2927     for (i = 1; i <= min; i++)
2928     {
2929     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930     GETCHARINC(c, eptr);
2931     switch(c)
2932     {
2933     default: break;
2934     case 0x09: /* HT */
2935     case 0x20: /* SPACE */
2936     case 0xa0: /* NBSP */
2937     case 0x1680: /* OGHAM SPACE MARK */
2938     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2939     case 0x2000: /* EN QUAD */
2940     case 0x2001: /* EM QUAD */
2941     case 0x2002: /* EN SPACE */
2942     case 0x2003: /* EM SPACE */
2943     case 0x2004: /* THREE-PER-EM SPACE */
2944     case 0x2005: /* FOUR-PER-EM SPACE */
2945     case 0x2006: /* SIX-PER-EM SPACE */
2946     case 0x2007: /* FIGURE SPACE */
2947     case 0x2008: /* PUNCTUATION SPACE */
2948     case 0x2009: /* THIN SPACE */
2949     case 0x200A: /* HAIR SPACE */
2950     case 0x202f: /* NARROW NO-BREAK SPACE */
2951     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2952     case 0x3000: /* IDEOGRAPHIC SPACE */
2953     RRETURN(MATCH_NOMATCH);
2954     }
2955     }
2956     break;
2957 ph10 182
2958 ph10 178 case OP_HSPACE:
2959     for (i = 1; i <= min; i++)
2960     {
2961     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962     GETCHARINC(c, eptr);
2963     switch(c)
2964     {
2965     default: RRETURN(MATCH_NOMATCH);
2966     case 0x09: /* HT */
2967     case 0x20: /* SPACE */
2968     case 0xa0: /* NBSP */
2969     case 0x1680: /* OGHAM SPACE MARK */
2970     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2971     case 0x2000: /* EN QUAD */
2972     case 0x2001: /* EM QUAD */
2973     case 0x2002: /* EN SPACE */
2974     case 0x2003: /* EM SPACE */
2975     case 0x2004: /* THREE-PER-EM SPACE */
2976     case 0x2005: /* FOUR-PER-EM SPACE */
2977     case 0x2006: /* SIX-PER-EM SPACE */
2978     case 0x2007: /* FIGURE SPACE */
2979     case 0x2008: /* PUNCTUATION SPACE */
2980     case 0x2009: /* THIN SPACE */
2981     case 0x200A: /* HAIR SPACE */
2982     case 0x202f: /* NARROW NO-BREAK SPACE */
2983     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2984     case 0x3000: /* IDEOGRAPHIC SPACE */
2985     break;
2986     }
2987     }
2988     break;
2989 ph10 182
2990 ph10 178 case OP_NOT_VSPACE:
2991     for (i = 1; i <= min; i++)
2992     {
2993     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2994     GETCHARINC(c, eptr);
2995     switch(c)
2996     {
2997     default: break;
2998     case 0x0a: /* LF */
2999     case 0x0b: /* VT */
3000     case 0x0c: /* FF */
3001     case 0x0d: /* CR */
3002     case 0x85: /* NEL */
3003     case 0x2028: /* LINE SEPARATOR */
3004     case 0x2029: /* PARAGRAPH SEPARATOR */
3005     RRETURN(MATCH_NOMATCH);
3006     }
3007     }
3008     break;
3009 ph10 182
3010 ph10 178 case OP_VSPACE:
3011     for (i = 1; i <= min; i++)
3012     {
3013     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3014     GETCHARINC(c, eptr);
3015     switch(c)
3016     {
3017     default: RRETURN(MATCH_NOMATCH);
3018     case 0x0a: /* LF */
3019     case 0x0b: /* VT */
3020     case 0x0c: /* FF */
3021     case 0x0d: /* CR */
3022     case 0x85: /* NEL */
3023     case 0x2028: /* LINE SEPARATOR */
3024     case 0x2029: /* PARAGRAPH SEPARATOR */
3025 ph10 182 break;
3026 ph10 178 }
3027     }
3028     break;
3029    
3030 nigel 77 case OP_NOT_DIGIT:
3031     for (i = 1; i <= min; i++)
3032     {
3033     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3034     GETCHARINC(c, eptr);
3035     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3036     RRETURN(MATCH_NOMATCH);
3037     }
3038     break;
3039    
3040     case OP_DIGIT:
3041     for (i = 1; i <= min; i++)
3042     {
3043     if (eptr >= md->end_subject ||
3044     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3045     RRETURN(MATCH_NOMATCH);
3046     /* No need to skip more bytes - we know it's a 1-byte character */
3047     }
3048     break;
3049    
3050     case OP_NOT_WHITESPACE:
3051     for (i = 1; i <= min; i++)
3052     {
3053     if (eptr >= md->end_subject ||
3054     (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3055     RRETURN(MATCH_NOMATCH);
3056     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3057     }
3058     break;
3059    
3060     case OP_WHITESPACE:
3061     for (i = 1; i <= min; i++)
3062     {
3063     if (eptr >= md->end_subject ||
3064     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3065     RRETURN(MATCH_NOMATCH);
3066     /* No need to skip more bytes - we know it's a 1-byte character */
3067     }
3068     break;
3069    
3070     case OP_NOT_WORDCHAR:
3071     for (i = 1; i <= min; i++)
3072     {
3073     if (eptr >= md->end_subject ||
3074     (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3075     RRETURN(MATCH_NOMATCH);
3076     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3077     }
3078     break;
3079    
3080     case OP_WORDCHAR:
3081     for (i = 1; i <= min; i++)
3082     {
3083     if (eptr >= md->end_subject ||
3084     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3085     RRETURN(MATCH_NOMATCH);
3086     /* No need to skip more bytes - we know it's a 1-byte character */
3087     }
3088     break;
3089    
3090     default:
3091     RRETURN(PCRE_ERROR_INTERNAL);
3092     } /* End switch(ctype) */
3093    
3094     else
3095     #endif /* SUPPORT_UTF8 */
3096    
3097     /* Code for the non-UTF-8 case for minimum matching of operators other
3098 nigel 93 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3099     number of bytes present, as this was tested above. */
3100 nigel 77
3101     switch(ctype)
3102     {
3103     case OP_ANY:
3104     if ((ims & PCRE_DOTALL) == 0)
3105     {
3106     for (i = 1; i <= min; i++)
3107 nigel 91 {
3108 nigel 93 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3109 nigel 91 eptr++;
3110     }
3111 nigel 77 }
3112     else eptr += min;
3113     break;
3114    
3115     case OP_ANYBYTE:
3116     eptr += min;
3117     break;
3118    
3119 nigel 93 /* Because of the CRLF case, we can't assume the minimum number of
3120     bytes are present in this case. */
3121    
3122     case OP_ANYNL:
3123     for (i = 1; i <= min; i++)
3124     {
3125     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3126     switch(*eptr++)
3127     {
3128     default: RRETURN(MATCH_NOMATCH);
3129     case 0x000d:
3130     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3131     break;
3132     case 0x000a:
3133     case 0x000b:
3134     case 0x000c:
3135     case 0x0085:
3136     break;
3137     }
3138     }
3139     break;
3140    
3141 ph10 178 case OP_NOT_HSPACE:
3142     for (i = 1; i <= min; i++)
3143     {
3144     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3145     switch(*eptr++)
3146     {
3147     default: break;
3148     case 0x09: /* HT */
3149     case 0x20: /* SPACE */
3150     case 0xa0: /* NBSP */
3151     RRETURN(MATCH_NOMATCH);
3152     }
3153     }
3154     break;
3155    
3156     case OP_HSPACE:
3157     for (i = 1; i <= min; i++)
3158     {
3159     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3160     switch(*eptr++)
3161     {
3162     default: RRETURN(MATCH_NOMATCH);
3163     case 0x09: /* HT */
3164     case 0x20: /* SPACE */
3165     case 0xa0: /* NBSP */
3166 ph10 182 break;
3167 ph10 178 }
3168     }
3169     break;
3170    
3171     case OP_NOT_VSPACE:
3172     for (i = 1; i <= min; i++)
3173     {
3174     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3175     switch(*eptr++)
3176     {
3177     default: break;
3178     case 0x0a: /* LF */
3179     case 0x0b: /* VT */
3180     case 0x0c: /* FF */
3181     case 0x0d: /* CR */
3182     case 0x85: /* NEL */
3183     RRETURN(MATCH_NOMATCH);
3184     }
3185     }
3186     break;
3187    
3188     case OP_VSPACE:
3189     for (i = 1; i <= min; i++)
3190     {
3191     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3192     switch(*eptr++)
3193     {
3194     default: RRETURN(MATCH_NOMATCH);
3195     case 0x0a: /* LF */
3196     case 0x0b: /* VT */
3197     case 0x0c: /* FF */
3198     case 0x0d: /* CR */
3199     case 0x85: /* NEL */
3200 ph10 182 break;
3201 ph10 178 }
3202     }
3203     break;
3204    
3205 nigel 77 case OP_NOT_DIGIT:
3206     for (i = 1; i <= min; i++)
3207     if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3208     break;
3209    
3210     case OP_DIGIT:
3211     for (i = 1; i <= min; i++)
3212     if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3213     break;
3214    
3215     case OP_NOT_WHITESPACE:
3216     for (i = 1; i <= min; i++)
3217     if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3218     break;
3219    
3220     case OP_WHITESPACE:
3221     for (i = 1; i <= min; i++)
3222     if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3223     break;
3224    
3225     case OP_NOT_WORDCHAR:
3226     for (i = 1; i <= min; i++)
3227     if ((md->ctypes[*eptr++] & ctype_word) != 0)
3228     RRETURN(MATCH_NOMATCH);
3229     break;
3230    
3231     case OP_WORDCHAR:
3232     for (i = 1; i <= min; i++)
3233     if ((md->ctypes[*eptr++] & ctype_word) == 0)
3234     RRETURN(MATCH_NOMATCH);
3235     break;
3236    
3237     default:
3238     RRETURN(PCRE_ERROR_INTERNAL);
3239     }
3240     }
3241    
3242     /* If min = max, continue at the same level without recursing */
3243    
3244     if (min == max) continue;
3245    
3246     /* If minimizing, we have to test the rest of the pattern before each
3247     subsequent match. Again, separate the UTF-8 case for speed, and also
3248     separate the UCP cases. */
3249    
3250     if (minimize)
3251     {
3252     #ifdef SUPPORT_UCP
3253 nigel 87 if (prop_type >= 0)
3254 nigel 77 {
3255 nigel 87 switch(prop_type)
3256 nigel 77 {
3257 nigel 87 case PT_ANY:
3258     for (fi = min;; fi++)
3259     {
3260 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3261 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263     GETCHARINC(c, eptr);
3264     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3265     }
3266 nigel 93 /* Control never gets here */
3267 nigel 87
3268     case PT_LAMP:
3269     for (fi = min;; fi++)
3270     {
3271 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3272 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3274     GETCHARINC(c, eptr);
3275     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3276     if ((prop_chartype == ucp_Lu ||
3277     prop_chartype == ucp_Ll ||
3278     prop_chartype == ucp_Lt) == prop_fail_result)
3279     RRETURN(MATCH_NOMATCH);
3280     }
3281 nigel 93 /* Control never gets here */
3282 nigel 87
3283     case PT_GC:
3284     for (fi = min;; fi++)
3285     {
3286 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3287 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289     GETCHARINC(c, eptr);
3290     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3291     if ((prop_category == prop_value) == prop_fail_result)
3292     RRETURN(MATCH_NOMATCH);
3293     }
3294 nigel 93 /* Control never gets here */
3295 nigel 87
3296     case PT_PC:
3297     for (fi = min;; fi++)
3298     {
3299 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3300 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302     GETCHARINC(c, eptr);
3303     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3304     if ((prop_chartype == prop_value) == prop_fail_result)
3305     RRETURN(MATCH_NOMATCH);
3306     }
3307 nigel 93 /* Control never gets here */
3308 nigel 87
3309     case PT_SC:
3310     for (fi = min;; fi++)
3311     {
3312 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3313 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3315     GETCHARINC(c, eptr);
3316     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3317     if ((prop_script == prop_value) == prop_fail_result)
3318     RRETURN(MATCH_NOMATCH);
3319     }
3320 nigel 93 /* Control never gets here */
3321 nigel 87
3322     default:
3323     RRETURN(PCRE_ERROR_INTERNAL);
3324 nigel 77 }
3325     }
3326    
3327     /* Match extended Unicode sequences. We will get here only if the
3328     support is in the binary; otherwise a compile-time error occurs. */
3329    
3330     else if (ctype == OP_EXTUNI)
3331     {
3332     for (fi = min;; fi++)
3333     {
3334 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3335 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3336     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3337     GETCHARINCTEST(c, eptr);
3338 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3339 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3340     while (eptr < md->end_subject)
3341     {
3342     int len = 1;
3343     if (!utf8) c = *eptr; else
3344     {
3345     GETCHARLEN(c, eptr, len);
3346     }
3347 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3348 nigel 77 if (prop_category != ucp_M) break;
3349     eptr += len;
3350     }
3351     }
3352     }
3353    
3354     else
3355     #endif /* SUPPORT_UCP */
3356    
3357     #ifdef SUPPORT_UTF8
3358     /* UTF-8 mode */
3359     if (utf8)
3360     {
3361     for (fi = min;; fi++)
3362     {
3363 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3364 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365 nigel 91 if (fi >= max || eptr >= md->end_subject ||
3366     (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3367 nigel 93 IS_NEWLINE(eptr)))
3368 nigel 91 RRETURN(MATCH_NOMATCH);
3369 nigel 77
3370     GETCHARINC(c, eptr);
3371     switch(ctype)
3372     {
3373 nigel 91 case OP_ANY: /* This is the DOTALL case */
3374 nigel 77 break;
3375    
3376     case OP_ANYBYTE:
3377     break;
3378    
3379 nigel 93 case OP_ANYNL:
3380     switch(c)
3381     {
3382     default: RRETURN(MATCH_NOMATCH);
3383     case 0x000d:
3384     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3385     break;
3386     case 0x000a:
3387     case 0x000b:
3388     case 0x000c:
3389     case 0x0085:
3390     case 0x2028:
3391     case 0x2029:
3392     break;
3393     }
3394     break;
3395    
3396 ph10 178 case OP_NOT_HSPACE:
3397     switch(c)
3398     {
3399     default: break;
3400     case 0x09: /* HT */
3401     case 0x20: /* SPACE */
3402     case 0xa0: /* NBSP */
3403     case 0x1680: /* OGHAM SPACE MARK */
3404     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3405     case 0x2000: /* EN QUAD */
3406     case 0x2001: /* EM QUAD */
3407     case 0x2002: /* EN SPACE */
3408     case 0x2003: /* EM SPACE */
3409     case 0x2004: /* THREE-PER-EM SPACE */
3410     case 0x2005: /* FOUR-PER-EM SPACE */
3411     case 0x2006: /* SIX-PER-EM SPACE */
3412     case 0x2007: /* FIGURE SPACE */
3413     case 0x2008: /* PUNCTUATION SPACE */
3414     case 0x2009: /* THIN SPACE */
3415     case 0x200A: /* HAIR SPACE */
3416     case 0x202f: /* NARROW NO-BREAK SPACE */
3417     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3418     case 0x3000: /* IDEOGRAPHIC SPACE */
3419     RRETURN(MATCH_NOMATCH);
3420     }
3421     break;
3422    
3423     case OP_HSPACE:
3424     switch(c)
3425     {
3426     default: RRETURN(MATCH_NOMATCH);
3427     case 0x09: /* HT */
3428     case 0x20: /* SPACE */
3429     case 0xa0: /* NBSP */
3430     case 0x1680: /* OGHAM SPACE MARK */
3431     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3432     case 0x2000: /* EN QUAD */
3433     case 0x2001: /* EM QUAD */
3434     case 0x2002: /* EN SPACE */
3435     case 0x2003: /* EM SPACE */
3436     case 0x2004: /* THREE-PER-EM SPACE */
3437     case 0x2005: /* FOUR-PER-EM SPACE */
3438     case 0x2006: /* SIX-PER-EM SPACE */
3439     case 0x2007: /* FIGURE SPACE */
3440     case 0x2008: /* PUNCTUATION SPACE */
3441     case 0x2009: /* THIN SPACE */
3442     case 0x200A: /* HAIR SPACE */
3443     case 0x202f: /* NARROW NO-BREAK SPACE */
3444     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3445     case 0x3000: /* IDEOGRAPHIC SPACE */
3446     break;
3447     }
3448     break;
3449    
3450     case OP_NOT_VSPACE:
3451     switch(c)
3452     {
3453     default: break;
3454     case 0x0a: /* LF */
3455     case 0x0b: /* VT */
3456     case 0x0c: /* FF */
3457     case 0x0d: /* CR */
3458     case 0x85: /* NEL */
3459     case 0x2028: /* LINE SEPARATOR */
3460     case 0x2029: /* PARAGRAPH SEPARATOR */
3461     RRETURN(MATCH_NOMATCH);
3462     }
3463     break;
3464    
3465     case OP_VSPACE:
3466     switch(c)
3467     {
3468     default: RRETURN(MATCH_NOMATCH);
3469     case 0x0a: /* LF */
3470     case 0x0b: /* VT */
3471     case 0x0c: /* FF */
3472     case 0x0d: /* CR */
3473     case 0x85: /* NEL */
3474     case 0x2028: /* LINE SEPARATOR */
3475     case 0x2029: /* PARAGRAPH SEPARATOR */
3476     break;
3477     }
3478     break;
3479    
3480 nigel 77 case OP_NOT_DIGIT:
3481     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3482     RRETURN(MATCH_NOMATCH);
3483     break;
3484    
3485     case OP_DIGIT:
3486     if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3487     RRETURN(MATCH_NOMATCH);
3488     break;
3489    
3490     case OP_NOT_WHITESPACE:
3491     if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3492     RRETURN(MATCH_NOMATCH);
3493     break;
3494    
3495     case OP_WHITESPACE:
3496     if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3497     RRETURN(MATCH_NOMATCH);
3498     break;
3499    
3500     case OP_NOT_WORDCHAR:
3501     if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3502     RRETURN(MATCH_NOMATCH);
3503     break;
3504    
3505     case OP_WORDCHAR:
3506     if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3507     RRETURN(MATCH_NOMATCH);
3508     break;
3509    
3510     default:
3511     RRETURN(PCRE_ERROR_INTERNAL);
3512     }
3513     }
3514     }
3515     else
3516     #endif
3517     /* Not UTF-8 mode */
3518     {
3519     for (fi = min;; fi++)
3520     {
3521 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3522 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3523 nigel 91 if (fi >= max || eptr >= md->end_subject ||
3524 nigel 93 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3525 nigel 91 RRETURN(MATCH_NOMATCH);
3526    
3527 nigel 77 c = *eptr++;
3528     switch(ctype)
3529     {
3530 nigel 91 case OP_ANY: /* This is the DOTALL case */
3531 nigel 77 break;
3532    
3533     case OP_ANYBYTE:
3534     break;
3535    
3536 nigel 93 case OP_ANYNL:
3537     switch(c)
3538     {
3539     default: RRETURN(MATCH_NOMATCH);
3540     case 0x000d:
3541     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3542     break;
3543     case 0x000a:
3544     case 0x000b:
3545     case 0x000c:
3546     case 0x0085:
3547     break;
3548     }
3549     break;
3550    
3551 ph10 178 case OP_NOT_HSPACE:
3552     switch(c)
3553     {
3554     default: break;
3555     case 0x09: /* HT */
3556     case 0x20: /* SPACE */
3557     case 0xa0: /* NBSP */
3558     RRETURN(MATCH_NOMATCH);
3559     }
3560     break;
3561    
3562     case OP_HSPACE:
3563     switch(c)
3564     {
3565     default: RRETURN(MATCH_NOMATCH);
3566     case 0x09: /* HT */
3567     case 0x20: /* SPACE */
3568     case 0xa0: /* NBSP */
3569     break;
3570     }
3571     break;
3572    
3573     case OP_NOT_VSPACE:
3574     switch(c)
3575     {
3576     default: break;
3577     case 0x0a: /* LF */
3578     case 0x0b: /* VT */
3579     case 0x0c: /* FF */
3580     case 0x0d: /* CR */
3581     case 0x85: /* NEL */
3582     RRETURN(MATCH_NOMATCH);
3583     }
3584     break;
3585    
3586     case OP_VSPACE:
3587     switch(c)
3588     {
3589     default: RRETURN(MATCH_NOMATCH);
3590     case 0x0a: /* LF */
3591     case 0x0b: /* VT */
3592     case 0x0c: /* FF */
3593     case 0x0d: /* CR */
3594     case 0x85: /* NEL */
3595     break;
3596     }
3597     break;
3598    
3599 nigel 77 case OP_NOT_DIGIT:
3600     if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3601     break;
3602    
3603     case OP_DIGIT:
3604     if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3605     break;
3606    
3607     case OP_NOT_WHITESPACE:
3608     if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3609     break;
3610    
3611     case OP_WHITESPACE:
3612     if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3613     break;
3614    
3615     case OP_NOT_WORDCHAR:
3616     if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3617     break;
3618    
3619     case OP_WORDCHAR:
3620     if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3621     break;
3622    
3623     default:
3624     RRETURN(PCRE_ERROR_INTERNAL);
3625     }
3626     }
3627     }
3628     /* Control never gets here */
3629     }
3630    
3631 nigel 93 /* If maximizing, it is worth using inline code for speed, doing the type
3632 nigel 77 test once at the start (i.e. keep it out of the loop). Again, keep the
3633     UTF-8 and UCP stuff separate. */
3634    
3635     else
3636     {
3637     pp = eptr; /* Remember where we started */
3638    
3639     #ifdef SUPPORT_UCP
3640 nigel 87 if (prop_type >= 0)
3641 nigel 77 {
3642 nigel 87 switch(prop_type)
3643 nigel 77 {
3644 nigel 87 case PT_ANY:
3645     for (i = min; i < max; i++)
3646     {
3647     int len = 1;
3648     if (eptr >= md->end_subject) break;
3649     GETCHARLEN(c, eptr, len);
3650     if (prop_fail_result) break;
3651     eptr+= len;
3652     }
3653     break;
3654    
3655     case PT_LAMP:
3656     for (i = min; i < max; i++)
3657     {
3658     int len = 1;
3659     if (eptr >= md->end_subject) break;
3660     GETCHARLEN(c, eptr, len);
3661     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3662     if ((prop_chartype == ucp_Lu ||
3663     prop_chartype == ucp_Ll ||
3664     prop_chartype == ucp_Lt) == prop_fail_result)
3665     break;
3666     eptr+= len;
3667     }
3668     break;
3669    
3670     case PT_GC:
3671     for (i = min; i < max; i++)
3672     {
3673     int len = 1;
3674     if (eptr >= md->end_subject) break;
3675     GETCHARLEN(c, eptr, len);
3676     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3677     if ((prop_category == prop_value) == prop_fail_result)
3678     break;
3679     eptr+= len;
3680     }
3681     break;
3682    
3683     case PT_PC:
3684     for (i = min; i < max; i++)
3685     {
3686     int len = 1;
3687     if (eptr >= md->end_subject) break;
3688     GETCHARLEN(c, eptr, len);
3689     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3690     if ((prop_chartype == prop_value) == prop_fail_result)
3691     break;
3692     eptr+= len;
3693     }
3694     break;
3695    
3696     case PT_SC:
3697     for (i = min; i < max; i++)
3698     {
3699     int len = 1;
3700     if (eptr >= md->end_subject) break;
3701     GETCHARLEN(c, eptr, len);
3702     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3703     if ((prop_script == prop_value) == prop_fail_result)
3704     break;
3705     eptr+= len;
3706     }
3707     break;
3708 nigel 77 }
3709    
3710     /* eptr is now past the end of the maximum run */
3711    
3712 nigel 93 if (possessive) continue;
3713 nigel 77 for(;;)
3714     {
3715 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3716 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3717     if (eptr-- == pp) break; /* Stop if tried at original pos */
3718     BACKCHAR(eptr);
3719     }
3720     }
3721    
3722     /* Match extended Unicode sequences. We will get here only if the
3723     support is in the binary; otherwise a compile-time error occurs. */
3724    
3725     else if (ctype == OP_EXTUNI)
3726     {
3727     for (i = min; i < max; i++)
3728     {
3729     if (eptr >= md->end_subject) break;
3730     GETCHARINCTEST(c, eptr);
3731 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3732 nigel 77 if (prop_category == ucp_M) break;
3733     while (eptr < md->end_subject)
3734     {
3735     int len = 1;
3736     if (!utf8) c = *eptr; else
3737     {
3738     GETCHARLEN(c, eptr, len);
3739     }
3740 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3741 nigel 77 if (prop_category != ucp_M) break;
3742     eptr += len;
3743     }
3744     }
3745    
3746     /* eptr is now past the end of the maximum run */
3747    
3748 nigel 93 if (possessive) continue;
3749 nigel 77 for(;;)
3750     {
3751 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3752 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3753     if (eptr-- == pp) break; /* Stop if tried at original pos */
3754     for (;;) /* Move back over one extended */
3755     {
3756     int len = 1;
3757     BACKCHAR(eptr);
3758     if (!utf8) c = *eptr; else
3759     {
3760     GETCHARLEN(c, eptr, len);
3761     }
3762 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3763 nigel 77 if (prop_category != ucp_M) break;
3764     eptr--;
3765     }
3766     }
3767     }
3768    
3769     else
3770     #endif /* SUPPORT_UCP */
3771    
3772     #ifdef SUPPORT_UTF8
3773     /* UTF-8 mode */
3774    
3775     if (utf8)
3776     {
3777     switch(ctype)
3778     {
3779     case OP_ANY:
3780     if (max < INT_MAX)
3781     {
3782     if ((ims & PCRE_DOTALL) == 0)
3783     {
3784     for (i = min; i < max; i++)
3785     {
3786 nigel 93 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3787 nigel 77 eptr++;
3788     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3789     }
3790     }
3791     else
3792     {
3793     for (i = min; i < max; i++)
3794     {
3795 nigel 91 if (eptr >= md->end_subject) break;
3796 nigel 77 eptr++;
3797     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3798     }
3799     }
3800     }
3801    
3802     /* Handle unlimited UTF-8 repeat */
3803    
3804     else
3805     {
3806     if ((ims & PCRE_DOTALL) == 0)
3807     {
3808     for (i = min; i < max; i++)
3809     {
3810 nigel 93 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3811 nigel 77 eptr++;
3812 ph10 190 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3813 nigel 77 }
3814     }
3815     else
3816     {
3817 ph10 190 eptr = md->end_subject;
3818 nigel 77 }
3819     }
3820     break;
3821    
3822     /* The byte case is the same as non-UTF8 */
3823    
3824     case OP_ANYBYTE:
3825     c = max - min;
3826 nigel 93 if (c > (unsigned int)(md->end_subject - eptr))
3827     c = md->end_subject - eptr;
3828 nigel 77 eptr += c;
3829     break;
3830    
3831 nigel 93 case OP_ANYNL:
3832     for (i = min; i < max; i++)
3833     {
3834     int len = 1;
3835     if (eptr >= md->end_subject) break;
3836     GETCHARLEN(c, eptr, len);
3837     if (c == 0x000d)
3838     {
3839     if (++eptr >= md->end_subject) break;
3840     if (*eptr == 0x000a) eptr++;
3841     }
3842     else
3843     {
3844     if (c != 0x000a && c != 0x000b && c != 0x000c &&
3845     c != 0x0085 && c != 0x2028 && c != 0x2029)
3846     break;
3847     eptr += len;
3848     }
3849     }
3850     break;
3851    
3852 ph10 178 case OP_NOT_HSPACE:
3853 ph10 182 case OP_HSPACE:
3854 ph10 178 for (i = min; i < max; i++)
3855     {
3856 ph10 182 BOOL gotspace;
3857 ph10 178 int len = 1;
3858     if (eptr >= md->end_subject) break;
3859     GETCHARLEN(c, eptr, len);
3860     switch(c)
3861 ph10 182 {
3862     default: gotspace = FALSE; break;
3863 ph10 178 case 0x09: /* HT */
3864     case 0x20: /* SPACE */
3865     case 0xa0: /* NBSP */
3866     case 0x1680: /* OGHAM SPACE MARK */
3867     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3868     case 0x2000: /* EN QUAD */
3869     case 0x2001: /* EM QUAD */
3870     case 0x2002: /* EN SPACE */
3871     case 0x2003: /* EM SPACE */
3872     case 0x2004: /* THREE-PER-EM SPACE */
3873     case 0x2005: /* FOUR-PER-EM SPACE */
3874     case 0x2006: /* SIX-PER-EM SPACE */
3875     case 0x2007: /* FIGURE SPACE */
3876     case 0x2008: /* PUNCTUATION SPACE */
3877     case 0x2009: /* THIN SPACE */
3878     case 0x200A: /* HAIR SPACE */
3879     case 0x202f: /* NARROW NO-BREAK SPACE */
3880     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3881     case 0x3000: /* IDEOGRAPHIC SPACE */
3882     gotspace = TRUE;
3883 ph10 182 break;
3884 ph10 178 }
3885     if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3886     eptr += len;
3887     }
3888     break;
3889    
3890     case OP_NOT_VSPACE:
3891 ph10 182 case OP_VSPACE:
3892 ph10 178 for (i = min; i < max; i++)
3893     {
3894 ph10 182 BOOL gotspace;
3895 ph10 178 int len = 1;
3896     if (eptr >= md->end_subject) break;
3897     GETCHARLEN(c, eptr, len);
3898     switch(c)
3899     {
3900 ph10 182 default: gotspace = FALSE; break;
3901 ph10 178 case 0x0a: /* LF */
3902     case 0x0b: /* VT */
3903     case 0x0c: /* FF */
3904     case 0x0d: /* CR */
3905     case 0x85: /* NEL */
3906     case 0x2028: /* LINE SEPARATOR */
3907     case 0x2029: /* PARAGRAPH SEPARATOR */
3908     gotspace = TRUE;
3909     break;
3910     }
3911 ph10 182 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3912 ph10 178 eptr += len;
3913     }
3914     break;
3915    
3916 nigel 77 case OP_NOT_DIGIT:
3917     for (i = min; i < max; i++)
3918     {
3919     int len = 1;
3920     if (eptr >= md->end_subject) break;
3921     GETCHARLEN(c, eptr, len);
3922     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3923     eptr+= len;
3924     }
3925     break;
3926    
3927     case OP_DIGIT:
3928     for (i = min; i < max; i++)
3929     {
3930     int len = 1;
3931     if (eptr >= md->end_subject) break;
3932     GETCHARLEN(c, eptr, len);
3933     if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3934     eptr+= len;
3935     }
3936     break;
3937    
3938     case OP_NOT_WHITESPACE:
3939     for (i = min; i < max; i++)
3940     {
3941     int len = 1;
3942     if (eptr >= md->end_subject) break;
3943     GETCHARLEN(c, eptr, len);
3944     if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3945     eptr+= len;
3946     }
3947     break;
3948    
3949     case OP_WHITESPACE:
3950     for (i = min; i < max; i++)
3951     {
3952     int len = 1;
3953     if (eptr >= md->end_subject) break;
3954     GETCHARLEN(c, eptr, len);
3955     if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3956     eptr+= len;
3957     }
3958     break;
3959    
3960     case OP_NOT_WORDCHAR:
3961     for (i = min; i < max; i++)
3962     {
3963     int len = 1;
3964     if (eptr >= md->end_subject) break;
3965     GETCHARLEN(c, eptr, len);
3966     if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3967     eptr+= len;
3968     }
3969     break;
3970    
3971     case OP_WORDCHAR:
3972     for (i = min; i < max; i++)
3973     {
3974     int len = 1;
3975     if (eptr >= md->end_subject) break;
3976     GETCHARLEN(c, eptr, len);
3977     if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3978     eptr+= len;
3979     }
3980     break;
3981    
3982     default:
3983     RRETURN(PCRE_ERROR_INTERNAL);
3984     }
3985    
3986     /* eptr is now past the end of the maximum run */
3987    
3988 nigel 93 if (possessive) continue;
3989 nigel 77 for(;;)
3990     {
3991 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3992 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3993     if (eptr-- == pp) break; /* Stop if tried at original pos */
3994     BACKCHAR(eptr);
3995     }
3996     }
3997     else
3998     #endif
3999    
4000     /* Not UTF-8 mode */
4001     {
4002     switch(ctype)
4003     {
4004     case OP_ANY:
4005     if ((ims & PCRE_DOTALL) == 0)
4006     {
4007     for (i = min; i < max; i++)
4008     {
4009 nigel 93 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4010 nigel 77 eptr++;
4011     }
4012     break;
4013     }
4014     /* For DOTALL case, fall through and treat as \C */
4015    
4016     case OP_ANYBYTE:
4017     c = max - min;
4018 nigel 93 if (c > (unsigned int)(md->end_subject - eptr))
4019     c = md->end_subject - eptr;
4020 nigel 77 eptr += c;
4021     break;
4022    
4023 nigel 93 case OP_ANYNL:
4024     for (i = min; i < max; i++)
4025     {
4026     if (eptr >= md->end_subject) break;
4027     c = *eptr;
4028     if (c == 0x000d)
4029     {
4030     if (++eptr >= md->end_subject) break;
4031     if (*eptr == 0x000a) eptr++;
4032     }
4033     else
4034     {
4035     if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4036     break;
4037     eptr++;
4038     }
4039     }
4040     break;
4041    
4042 ph10 178 case OP_NOT_HSPACE:
4043     for (i = min; i < max; i++)
4044     {
4045     if (eptr >= md->end_subject) break;
4046     c = *eptr;
4047     if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4048 ph10 182 eptr++;
4049 ph10 178 }
4050     break;
4051    
4052     case OP_HSPACE:
4053     for (i = min; i < max; i++)
4054     {
4055     if (eptr >= md->end_subject) break;
4056     c = *eptr;
4057     if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4058 ph10 182 eptr++;
4059 ph10 178 }
4060     break;
4061    
4062     case OP_NOT_VSPACE:
4063     for (i = min; i < max; i++)
4064     {
4065     if (eptr >= md->end_subject) break;
4066     c = *eptr;
4067     if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4068     break;
4069 ph10 182 eptr++;
4070 ph10 178 }
4071     break;
4072    
4073     case OP_VSPACE:
4074     for (i = min; i < max; i++)
4075     {
4076     if (eptr >= md->end_subject) break;
4077     c = *eptr;
4078     if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4079     break;
4080     eptr++;
4081     }
4082     break;
4083    
4084 nigel 77 case OP_NOT_DIGIT:
4085     for (i = min; i < max; i++)
4086     {
4087     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4088     break;
4089     eptr++;
4090     }
4091     break;
4092    
4093     case OP_DIGIT:
4094     for (i = min; i < max; i++)
4095     {
4096     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4097     break;
4098     eptr++;
4099     }
4100     break;
4101    
4102     case OP_NOT_WHITESPACE:
4103     for (i = min; i < max; i++)
4104     {
4105     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4106     break;
4107     eptr++;
4108     }
4109     break;
4110    
4111     case OP_WHITESPACE:
4112     for (i = min; i < max; i++)
4113     {
4114     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4115     break;
4116     eptr++;
4117     }
4118     break;
4119    
4120     case OP_NOT_WORDCHAR:
4121     for (i = min; i < max; i++)
4122     {
4123     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4124     break;
4125     eptr++;
4126     }
4127     break;
4128    
4129     case OP_WORDCHAR:
4130     for (i = min; i < max; i++)
4131     {
4132     if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4133     break;
4134     eptr++;
4135     }
4136     break;
4137    
4138     default:
4139     RRETURN(PCRE_ERROR_INTERNAL);
4140     }
4141    
4142     /* eptr is now past the end of the maximum run */
4143    
4144 nigel 93 if (possessive) continue;
4145 nigel 77 while (eptr >= pp)
4146     {
4147 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4148 nigel 77 eptr--;
4149     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4150     }
4151     }
4152    
4153     /* Get here if we can't make it match with any permitted repetitions */
4154    
4155     RRETURN(MATCH_NOMATCH);
4156     }
4157     /* Control never gets here */
4158    
4159 nigel 93 /* There's been some horrible disaster. Arrival here can only mean there is
4160     something seriously wrong in the code above or the OP_xxx definitions. */
4161 nigel 77
4162