/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 345 - (hide annotations) (download)
Mon Apr 28 15:10:02 2008 UTC (6 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 150741 byte(s)
Tidies for the 7.7-RC1 distribution.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 305 Copyright (c) 1997-2008 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92     #ifdef DEBUG
93     /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144     #ifdef DEBUG
145     if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161     /* Separate the caselesss case for speed */
162    
163     if ((ims & PCRE_CASELESS) != 0)
164     {
165     while (length-- > 0)
166     if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
167     }
168     else
169     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
170    
171     return TRUE;
172     }
173    
174    
175    
176     /***************************************************************************
177     ****************************************************************************
178     RECURSION IN THE match() FUNCTION
179    
180 nigel 87 The match() function is highly recursive, though not every recursive call
181     increases the recursive depth. Nevertheless, some regular expressions can cause
182     it to recurse to a great depth. I was writing for Unix, so I just let it call
183     itself recursively. This uses the stack for saving everything that has to be
184     saved for a recursive call. On Unix, the stack can be large, and this works
185     fine.
186 nigel 77
187 nigel 87 It turns out that on some non-Unix-like systems there are problems with
188     programs that use a lot of stack. (This despite the fact that every last chip
189     has oodles of memory these days, and techniques for extending the stack have
190     been known for decades.) So....
191 nigel 77
192     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
193     calls by keeping local variables that need to be preserved in blocks of memory
194 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
195 nigel 77 achieve this so that the actual code doesn't look very different to what it
196     always used to.
197 ph10 164
198 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
199 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
200     Switzer, the use of longjmp() has been abolished, at the cost of having to
201     provide a unique number for each call to RMATCH. There is no way of generating
202     a sequence of numbers at compile time in C. I have given them names, to make
203     them stand out more clearly.
204    
205     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
206     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
207 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
208     don't have indeterminate values; this has meant that the frame size can be
209 ph10 164 reduced because the result can be "passed back" by straight setting of the
210     variable instead of being passed in the frame.
211 nigel 77 ****************************************************************************
212     ***************************************************************************/
213    
214 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
215     below must be updated in sync. */
216 nigel 77
217 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
218     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222 ph10 212 RM51, RM52, RM53, RM54 };
223 ph10 164
224 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
225 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
226 ph10 164 actuall used in this definition. */
227 nigel 77
228     #ifndef NO_RECURSE
229     #define REGISTER register
230 ph10 164
231 nigel 87 #ifdef DEBUG
232 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233 nigel 87 { \
234     printf("match() called in line %d\n", __LINE__); \
235 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
236 nigel 87 printf("to line %d\n", __LINE__); \
237     }
238     #define RRETURN(ra) \
239     { \
240     printf("match() returned %d from line %d ", ra, __LINE__); \
241     return ra; \
242     }
243     #else
244 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
245 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
246 nigel 77 #define RRETURN(ra) return ra
247 nigel 87 #endif
248    
249 nigel 77 #else
250    
251    
252 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
253     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
254     argument of match(), which never changes. */
255 nigel 77
256     #define REGISTER
257    
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
259 nigel 77 {\
260     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
261 ph10 164 frame->Xwhere = rw; \
262     newframe->Xeptr = ra;\
263     newframe->Xecode = rb;\
264 ph10 168 newframe->Xmstart = mstart;\
265 ph10 164 newframe->Xoffset_top = rc;\
266     newframe->Xims = re;\
267     newframe->Xeptrb = rf;\
268     newframe->Xflags = rg;\
269     newframe->Xrdepth = frame->Xrdepth + 1;\
270     newframe->Xprevframe = frame;\
271     frame = newframe;\
272     DPRINTF(("restarting from line %d\n", __LINE__));\
273     goto HEAP_RECURSE;\
274     L_##rw:\
275     DPRINTF(("jumped back to line %d\n", __LINE__));\
276 nigel 77 }
277    
278     #define RRETURN(ra)\
279     {\
280     heapframe *newframe = frame;\
281     frame = newframe->Xprevframe;\
282     (pcre_stack_free)(newframe);\
283     if (frame != NULL)\
284     {\
285 ph10 164 rrc = ra;\
286     goto HEAP_RETURN;\
287 nigel 77 }\
288     return ra;\
289     }
290    
291    
292     /* Structure for remembering the local variables in a private frame */
293    
294     typedef struct heapframe {
295     struct heapframe *Xprevframe;
296    
297     /* Function arguments that may change */
298    
299     const uschar *Xeptr;
300     const uschar *Xecode;
301 ph10 172 const uschar *Xmstart;
302 nigel 77 int Xoffset_top;
303     long int Xims;
304     eptrblock *Xeptrb;
305     int Xflags;
306 nigel 91 unsigned int Xrdepth;
307 nigel 77
308     /* Function local variables */
309    
310     const uschar *Xcallpat;
311     const uschar *Xcharptr;
312     const uschar *Xdata;
313     const uschar *Xnext;
314     const uschar *Xpp;
315     const uschar *Xprev;
316     const uschar *Xsaved_eptr;
317    
318     recursion_info Xnew_recursive;
319    
320     BOOL Xcur_is_word;
321     BOOL Xcondition;
322     BOOL Xprev_is_word;
323    
324     unsigned long int Xoriginal_ims;
325    
326     #ifdef SUPPORT_UCP
327     int Xprop_type;
328 nigel 87 int Xprop_value;
329 nigel 77 int Xprop_fail_result;
330     int Xprop_category;
331     int Xprop_chartype;
332 nigel 87 int Xprop_script;
333 ph10 123 int Xoclength;
334     uschar Xocchars[8];
335 nigel 77 #endif
336    
337     int Xctype;
338 nigel 93 unsigned int Xfc;
339 nigel 77 int Xfi;
340     int Xlength;
341     int Xmax;
342     int Xmin;
343     int Xnumber;
344     int Xoffset;
345     int Xop;
346     int Xsave_capture_last;
347     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
348     int Xstacksave[REC_STACK_SAVE_MAX];
349    
350     eptrblock Xnewptrb;
351    
352 ph10 164 /* Where to jump back to */
353 nigel 77
354 ph10 164 int Xwhere;
355 ph10 165
356 nigel 77 } heapframe;
357    
358     #endif
359    
360    
361     /***************************************************************************
362     ***************************************************************************/
363    
364    
365    
366     /*************************************************
367     * Match from current position *
368     *************************************************/
369    
370 nigel 93 /* This function is called recursively in many circumstances. Whenever it
371 nigel 77 returns a negative (error) response, the outer incarnation must also return the
372     same response.
373    
374     Performance note: It might be tempting to extract commonly used fields from the
375     md structure (e.g. utf8, end_subject) into individual variables to improve
376     performance. Tests using gcc on a SPARC disproved this; in the first case, it
377     made performance worse.
378    
379     Arguments:
380 nigel 93 eptr pointer to current character in subject
381     ecode pointer to current position in compiled code
382 ph10 168 mstart pointer to the current match start position (can be modified
383 ph10 172 by encountering \K)
384 nigel 77 offset_top current top pointer
385     md pointer to "static" info for the match
386     ims current /i, /m, and /s options
387     eptrb pointer to chain of blocks containing eptr at start of
388     brackets - for testing for empty matches
389     flags can contain
390     match_condassert - this is an assertion condition
391 nigel 93 match_cbegroup - this is the start of an unlimited repeat
392     group that can match an empty string
393 nigel 87 rdepth the recursion depth
394 nigel 77
395     Returns: MATCH_MATCH if matched ) these values are >= 0
396     MATCH_NOMATCH if failed to match )
397     a negative PCRE_ERROR_xxx value if aborted by an error condition
398 nigel 87 (e.g. stopped by repeated call or recursion limit)
399 nigel 77 */
400    
401     static int
402 ph10 172 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
403 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
404 nigel 91 int flags, unsigned int rdepth)
405 nigel 77 {
406     /* These variables do not need to be preserved over recursion in this function,
407 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
408     "register" because they are used a lot in loops. */
409 nigel 77
410 nigel 91 register int rrc; /* Returns from recursive calls */
411     register int i; /* Used for loops not involving calls to RMATCH() */
412 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
413 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
414 nigel 77
415 nigel 93 BOOL minimize, possessive; /* Quantifier options */
416    
417 nigel 77 /* When recursion is not being used, all "local" variables that have to be
418     preserved over calls to RMATCH() are part of a "frame" which is obtained from
419     heap storage. Set up the top-level frame here; others are obtained from the
420     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
421    
422     #ifdef NO_RECURSE
423     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
424     frame->Xprevframe = NULL; /* Marks the top level */
425    
426     /* Copy in the original argument variables */
427    
428     frame->Xeptr = eptr;
429     frame->Xecode = ecode;
430 ph10 168 frame->Xmstart = mstart;
431 nigel 77 frame->Xoffset_top = offset_top;
432     frame->Xims = ims;
433     frame->Xeptrb = eptrb;
434     frame->Xflags = flags;
435 nigel 87 frame->Xrdepth = rdepth;
436 nigel 77
437     /* This is where control jumps back to to effect "recursion" */
438    
439     HEAP_RECURSE:
440    
441     /* Macros make the argument variables come from the current frame */
442    
443     #define eptr frame->Xeptr
444     #define ecode frame->Xecode
445 ph10 168 #define mstart frame->Xmstart
446 nigel 77 #define offset_top frame->Xoffset_top
447     #define ims frame->Xims
448     #define eptrb frame->Xeptrb
449     #define flags frame->Xflags
450 nigel 87 #define rdepth frame->Xrdepth
451 nigel 77
452     /* Ditto for the local variables */
453    
454     #ifdef SUPPORT_UTF8
455     #define charptr frame->Xcharptr
456     #endif
457     #define callpat frame->Xcallpat
458     #define data frame->Xdata
459     #define next frame->Xnext
460     #define pp frame->Xpp
461     #define prev frame->Xprev
462     #define saved_eptr frame->Xsaved_eptr
463    
464     #define new_recursive frame->Xnew_recursive
465    
466     #define cur_is_word frame->Xcur_is_word
467     #define condition frame->Xcondition
468     #define prev_is_word frame->Xprev_is_word
469    
470     #define original_ims frame->Xoriginal_ims
471    
472     #ifdef SUPPORT_UCP
473     #define prop_type frame->Xprop_type
474 nigel 87 #define prop_value frame->Xprop_value
475 nigel 77 #define prop_fail_result frame->Xprop_fail_result
476     #define prop_category frame->Xprop_category
477     #define prop_chartype frame->Xprop_chartype
478 nigel 87 #define prop_script frame->Xprop_script
479 ph10 115 #define oclength frame->Xoclength
480     #define occhars frame->Xocchars
481 nigel 77 #endif
482    
483     #define ctype frame->Xctype
484     #define fc frame->Xfc
485     #define fi frame->Xfi
486     #define length frame->Xlength
487     #define max frame->Xmax
488     #define min frame->Xmin
489     #define number frame->Xnumber
490     #define offset frame->Xoffset
491     #define op frame->Xop
492     #define save_capture_last frame->Xsave_capture_last
493     #define save_offset1 frame->Xsave_offset1
494     #define save_offset2 frame->Xsave_offset2
495     #define save_offset3 frame->Xsave_offset3
496     #define stacksave frame->Xstacksave
497    
498     #define newptrb frame->Xnewptrb
499    
500     /* When recursion is being used, local variables are allocated on the stack and
501     get preserved during recursion in the normal way. In this environment, fi and
502     i, and fc and c, can be the same variables. */
503    
504 nigel 93 #else /* NO_RECURSE not defined */
505 nigel 77 #define fi i
506     #define fc c
507    
508    
509 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
510     const uschar *charptr; /* in small blocks of the code. My normal */
511     #endif /* style of coding would have declared */
512     const uschar *callpat; /* them within each of those blocks. */
513     const uschar *data; /* However, in order to accommodate the */
514     const uschar *next; /* version of this code that uses an */
515     USPTR pp; /* external "stack" implemented on the */
516     const uschar *prev; /* heap, it is easier to declare them all */
517     USPTR saved_eptr; /* here, so the declarations can be cut */
518     /* out in a block. The only declarations */
519     recursion_info new_recursive; /* within blocks below are for variables */
520     /* that do not have to be preserved over */
521     BOOL cur_is_word; /* a recursive call to RMATCH(). */
522     BOOL condition;
523 nigel 77 BOOL prev_is_word;
524    
525     unsigned long int original_ims;
526    
527     #ifdef SUPPORT_UCP
528     int prop_type;
529 nigel 87 int prop_value;
530 nigel 77 int prop_fail_result;
531     int prop_category;
532     int prop_chartype;
533 nigel 87 int prop_script;
534 ph10 115 int oclength;
535     uschar occhars[8];
536 nigel 77 #endif
537    
538     int ctype;
539     int length;
540     int max;
541     int min;
542     int number;
543     int offset;
544     int op;
545     int save_capture_last;
546     int save_offset1, save_offset2, save_offset3;
547     int stacksave[REC_STACK_SAVE_MAX];
548    
549     eptrblock newptrb;
550 nigel 93 #endif /* NO_RECURSE */
551 nigel 77
552     /* These statements are here to stop the compiler complaining about unitialized
553     variables. */
554    
555     #ifdef SUPPORT_UCP
556 nigel 87 prop_value = 0;
557 nigel 77 prop_fail_result = 0;
558     #endif
559    
560 nigel 93
561 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
562     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
563     used. Thanks to Ian Taylor for noticing this possibility and sending the
564     original patch. */
565    
566     TAIL_RECURSE:
567    
568 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
569     are specified by the macro RMATCH and RRETURN is used to return. When
570     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
571     and a "return", respectively (possibly with some debugging if DEBUG is
572     defined). However, RMATCH isn't like a function call because it's quite a
573     complicated macro. It has to be used in one particular way. This shouldn't,
574     however, impact performance when true recursion is being used. */
575 nigel 77
576 ph10 164 #ifdef SUPPORT_UTF8
577     utf8 = md->utf8; /* Local copy of the flag */
578     #else
579     utf8 = FALSE;
580     #endif
581    
582 nigel 87 /* First check that we haven't called match() too many times, or that we
583     haven't exceeded the recursive call limit. */
584    
585 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
586 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
587 nigel 77
588     original_ims = ims; /* Save for resetting on ')' */
589 nigel 91
590 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
591     string, the match_cbegroup flag is set. When this is the case, add the current
592     subject pointer to the chain of such remembered pointers, to be checked when we
593     hit the closing ket, in order to break infinite loops that match no characters.
594 ph10 197 When match() is called in other circumstances, don't add to the chain. The
595     match_cbegroup flag must NOT be used with tail recursion, because the memory
596     block that is used is on the stack, so a new one may be required for each
597     match(). */
598 nigel 77
599 nigel 93 if ((flags & match_cbegroup) != 0)
600 nigel 77 {
601 ph10 197 newptrb.epb_saved_eptr = eptr;
602     newptrb.epb_prev = eptrb;
603     eptrb = &newptrb;
604 nigel 77 }
605    
606 nigel 93 /* Now start processing the opcodes. */
607 nigel 77
608     for (;;)
609     {
610 nigel 93 minimize = possessive = FALSE;
611 nigel 77 op = *ecode;
612    
613     /* For partial matching, remember if we ever hit the end of the subject after
614     matching at least one subject character. */
615    
616     if (md->partial &&
617     eptr >= md->end_subject &&
618 ph10 168 eptr > mstart)
619 nigel 77 md->hitend = TRUE;
620 ph10 208
621 nigel 93 switch(op)
622     {
623 ph10 210 case OP_FAIL:
624 ph10 212 RRETURN(MATCH_NOMATCH);
625 ph10 211
626 ph10 210 case OP_PRUNE:
627     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
628     ims, eptrb, flags, RM51);
629     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
630 ph10 212 RRETURN(MATCH_PRUNE);
631 ph10 211
632 ph10 210 case OP_COMMIT:
633     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
634     ims, eptrb, flags, RM52);
635     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
636 ph10 212 RRETURN(MATCH_COMMIT);
637 ph10 211
638 ph10 210 case OP_SKIP:
639     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
640     ims, eptrb, flags, RM53);
641     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
642 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
643 ph10 212 RRETURN(MATCH_SKIP);
644 ph10 211
645 ph10 210 case OP_THEN:
646     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
647 ph10 212 ims, eptrb, flags, RM54);
648 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
649 ph10 212 RRETURN(MATCH_THEN);
650 ph10 211
651 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
652     the current subject position in the working slot at the top of the vector.
653     We mustn't change the current values of the data slot, because they may be
654     set from a previous iteration of this group, and be referred to by a
655     reference inside the group.
656 nigel 77
657 nigel 93 If the bracket fails to match, we need to restore this value and also the
658     values of the final offsets, in case they were set by a previous iteration
659     of the same bracket.
660 nigel 77
661 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
662     a non-capturing bracket. Don't worry about setting the flag for the error
663     case here; that is handled in the code for KET. */
664 nigel 77
665 nigel 93 case OP_CBRA:
666     case OP_SCBRA:
667     number = GET2(ecode, 1+LINK_SIZE);
668 nigel 77 offset = number << 1;
669    
670     #ifdef DEBUG
671 nigel 93 printf("start bracket %d\n", number);
672     printf("subject=");
673 nigel 77 pchars(eptr, 16, TRUE, md);
674     printf("\n");
675     #endif
676    
677     if (offset < md->offset_max)
678     {
679     save_offset1 = md->offset_vector[offset];
680     save_offset2 = md->offset_vector[offset+1];
681     save_offset3 = md->offset_vector[md->offset_end - number];
682     save_capture_last = md->capture_last;
683    
684     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
685     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
686    
687 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
688 nigel 77 do
689     {
690 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691     ims, eptrb, flags, RM1);
692 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
693 nigel 77 md->capture_last = save_capture_last;
694     ecode += GET(ecode, 1);
695     }
696     while (*ecode == OP_ALT);
697    
698     DPRINTF(("bracket %d failed\n", number));
699    
700     md->offset_vector[offset] = save_offset1;
701     md->offset_vector[offset+1] = save_offset2;
702     md->offset_vector[md->offset_end - number] = save_offset3;
703    
704     RRETURN(MATCH_NOMATCH);
705     }
706    
707 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
708     as a non-capturing bracket. */
709 nigel 77
710 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
711     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712    
713 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
714 nigel 77
715 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
716     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717    
718 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
719     final alternative within the brackets, we would return the result of a
720     recursive call to match() whatever happened. We can reduce stack usage by
721 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
722     is set.*/
723 nigel 77
724 nigel 93 case OP_BRA:
725     case OP_SBRA:
726     DPRINTF(("start non-capturing bracket\n"));
727     flags = (op >= OP_SBRA)? match_cbegroup : 0;
728 nigel 91 for (;;)
729 nigel 77 {
730 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
731 nigel 93 {
732 ph10 197 if (flags == 0) /* Not a possibly empty group */
733     {
734     ecode += _pcre_OP_lengths[*ecode];
735     DPRINTF(("bracket 0 tail recursion\n"));
736     goto TAIL_RECURSE;
737     }
738    
739     /* Possibly empty group; can't use tail recursion. */
740    
741     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
742     eptrb, flags, RM48);
743     RRETURN(rrc);
744 nigel 93 }
745 nigel 91
746     /* For non-final alternatives, continue the loop for a NOMATCH result;
747     otherwise return. */
748    
749 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
750     eptrb, flags, RM2);
751 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
752 nigel 77 ecode += GET(ecode, 1);
753     }
754 nigel 91 /* Control never reaches here. */
755 nigel 77
756     /* Conditional group: compilation checked that there are no more than
757     two branches. If the condition is false, skipping the first branch takes us
758     past the end if there is only one branch, but that's OK because that is
759 nigel 91 exactly what going to the ket would do. As there is only one branch to be
760     obeyed, we can use tail recursion to avoid using another stack frame. */
761 nigel 77
762     case OP_COND:
763 nigel 93 case OP_SCOND:
764     if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
765 nigel 77 {
766 nigel 93 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
767     condition = md->recursive != NULL &&
768     (offset == RREF_ANY || offset == md->recursive->group_num);
769     ecode += condition? 3 : GET(ecode, 1);
770     }
771    
772     else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
773     {
774 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
775 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
776     ecode += condition? 3 : GET(ecode, 1);
777 nigel 77 }
778    
779 nigel 93 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
780     {
781     condition = FALSE;
782     ecode += GET(ecode, 1);
783     }
784    
785 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
786 nigel 93 the final argument match_condassert causes it to stop at the end of an
787     assertion. */
788 nigel 77
789     else
790     {
791 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
792     match_condassert, RM3);
793 nigel 77 if (rrc == MATCH_MATCH)
794     {
795 nigel 93 condition = TRUE;
796     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
797 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
798     }
799 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
800 nigel 77 {
801     RRETURN(rrc); /* Need braces because of following else */
802     }
803 nigel 93 else
804     {
805     condition = FALSE;
806     ecode += GET(ecode, 1);
807     }
808     }
809 nigel 91
810 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
811 ph10 197 we can use tail recursion to avoid using another stack frame, except when
812     match_cbegroup is required for an unlimited repeat of a possibly empty
813     group. If the second alternative doesn't exist, we can just plough on. */
814 nigel 91
815 nigel 93 if (condition || *ecode == OP_ALT)
816     {
817 nigel 91 ecode += 1 + LINK_SIZE;
818 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
819     {
820     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
821     RRETURN(rrc);
822     }
823     else /* Group must match something */
824     {
825     flags = 0;
826     goto TAIL_RECURSE;
827     }
828 nigel 77 }
829 ph10 197 else /* Condition false & no 2nd alternative */
830 nigel 93 {
831     ecode += 1 + LINK_SIZE;
832     }
833     break;
834 nigel 77
835    
836 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
837     recursion, we should restore the offsets appropriately and continue from
838     after the call. */
839 nigel 77
840 ph10 210 case OP_ACCEPT:
841 nigel 77 case OP_END:
842     if (md->recursive != NULL && md->recursive->group_num == 0)
843     {
844     recursion_info *rec = md->recursive;
845 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
846 nigel 77 md->recursive = rec->prevrec;
847     memmove(md->offset_vector, rec->offset_save,
848     rec->saved_max * sizeof(int));
849 ph10 168 mstart = rec->save_start;
850 nigel 77 ims = original_ims;
851     ecode = rec->after_call;
852     break;
853     }
854    
855     /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
856     string - backtracking will then try other alternatives, if any. */
857    
858 ph10 168 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
859     md->end_match_ptr = eptr; /* Record where we ended */
860     md->end_offset_top = offset_top; /* and how many extracts were taken */
861 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
862 nigel 77 RRETURN(MATCH_MATCH);
863    
864     /* Change option settings */
865    
866     case OP_OPT:
867     ims = ecode[1];
868     ecode += 2;
869     DPRINTF(("ims set to %02lx\n", ims));
870     break;
871    
872     /* Assertion brackets. Check the alternative branches in turn - the
873     matching won't pass the KET for an assertion. If any one branch matches,
874     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
875     start of each branch to move the current point backwards, so the code at
876     this level is identical to the lookahead case. */
877    
878     case OP_ASSERT:
879     case OP_ASSERTBACK:
880     do
881     {
882 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
883     RM4);
884 nigel 77 if (rrc == MATCH_MATCH) break;
885 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
886 nigel 77 ecode += GET(ecode, 1);
887     }
888     while (*ecode == OP_ALT);
889     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
890    
891     /* If checking an assertion for a condition, return MATCH_MATCH. */
892    
893     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
894    
895     /* Continue from after the assertion, updating the offsets high water
896     mark, since extracts may have been taken during the assertion. */
897    
898     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
899     ecode += 1 + LINK_SIZE;
900     offset_top = md->end_offset_top;
901     continue;
902    
903     /* Negative assertion: all branches must fail to match */
904    
905     case OP_ASSERT_NOT:
906     case OP_ASSERTBACK_NOT:
907     do
908     {
909 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
910     RM5);
911 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
912 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
913 nigel 77 ecode += GET(ecode,1);
914     }
915     while (*ecode == OP_ALT);
916    
917     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
918    
919     ecode += 1 + LINK_SIZE;
920     continue;
921    
922     /* Move the subject pointer back. This occurs only at the start of
923     each branch of a lookbehind assertion. If we are too close to the start to
924     move back, this match function fails. When working with UTF-8 we move
925     back a number of characters, not bytes. */
926    
927     case OP_REVERSE:
928     #ifdef SUPPORT_UTF8
929     if (utf8)
930     {
931 nigel 93 i = GET(ecode, 1);
932     while (i-- > 0)
933 nigel 77 {
934     eptr--;
935     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
936 ph10 207 BACKCHAR(eptr);
937 nigel 77 }
938     }
939     else
940     #endif
941    
942     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
943    
944     {
945 nigel 93 eptr -= GET(ecode, 1);
946 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
947     }
948    
949     /* Skip to next op code */
950    
951     ecode += 1 + LINK_SIZE;
952     break;
953    
954     /* The callout item calls an external function, if one is provided, passing
955     details of the match so far. This is mainly for debugging, though the
956     function is able to force a failure. */
957    
958     case OP_CALLOUT:
959     if (pcre_callout != NULL)
960     {
961     pcre_callout_block cb;
962     cb.version = 1; /* Version 1 of the callout block */
963     cb.callout_number = ecode[1];
964     cb.offset_vector = md->offset_vector;
965 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
966 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
967 ph10 168 cb.start_match = mstart - md->start_subject;
968 nigel 77 cb.current_position = eptr - md->start_subject;
969     cb.pattern_position = GET(ecode, 2);
970     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
971     cb.capture_top = offset_top/2;
972     cb.capture_last = md->capture_last;
973     cb.callout_data = md->callout_data;
974     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
975     if (rrc < 0) RRETURN(rrc);
976     }
977     ecode += 2 + 2*LINK_SIZE;
978     break;
979    
980     /* Recursion either matches the current regex, or some subexpression. The
981     offset data is the offset to the starting bracket from the start of the
982     whole pattern. (This is so that it works from duplicated subpatterns.)
983    
984     If there are any capturing brackets started but not finished, we have to
985     save their starting points and reinstate them after the recursion. However,
986     we don't know how many such there are (offset_top records the completed
987     total) so we just have to save all the potential data. There may be up to
988     65535 such values, which is too large to put on the stack, but using malloc
989     for small numbers seems expensive. As a compromise, the stack is used when
990     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
991     is used. A problem is what to do if the malloc fails ... there is no way of
992     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
993     values on the stack, and accept that the rest may be wrong.
994    
995     There are also other values that have to be saved. We use a chained
996     sequence of blocks that actually live on the stack. Thanks to Robin Houston
997     for the original version of this logic. */
998    
999     case OP_RECURSE:
1000     {
1001     callpat = md->start_code + GET(ecode, 1);
1002 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1003     GET2(callpat, 1 + LINK_SIZE);
1004 nigel 77
1005     /* Add to "recursing stack" */
1006    
1007     new_recursive.prevrec = md->recursive;
1008     md->recursive = &new_recursive;
1009    
1010     /* Find where to continue from afterwards */
1011    
1012     ecode += 1 + LINK_SIZE;
1013     new_recursive.after_call = ecode;
1014    
1015     /* Now save the offset data. */
1016    
1017     new_recursive.saved_max = md->offset_end;
1018     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1019     new_recursive.offset_save = stacksave;
1020     else
1021     {
1022     new_recursive.offset_save =
1023     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1024     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1025     }
1026    
1027     memcpy(new_recursive.offset_save, md->offset_vector,
1028     new_recursive.saved_max * sizeof(int));
1029 ph10 168 new_recursive.save_start = mstart;
1030     mstart = eptr;
1031 nigel 77
1032     /* OK, now we can do the recursion. For each top-level alternative we
1033     restore the offset and recursion data. */
1034    
1035     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1036 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1037 nigel 77 do
1038     {
1039 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1040     md, ims, eptrb, flags, RM6);
1041 nigel 77 if (rrc == MATCH_MATCH)
1042     {
1043 nigel 87 DPRINTF(("Recursion matched\n"));
1044 nigel 77 md->recursive = new_recursive.prevrec;
1045     if (new_recursive.offset_save != stacksave)
1046     (pcre_free)(new_recursive.offset_save);
1047     RRETURN(MATCH_MATCH);
1048     }
1049 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1050 nigel 87 {
1051     DPRINTF(("Recursion gave error %d\n", rrc));
1052     RRETURN(rrc);
1053     }
1054 nigel 77
1055     md->recursive = &new_recursive;
1056     memcpy(md->offset_vector, new_recursive.offset_save,
1057     new_recursive.saved_max * sizeof(int));
1058     callpat += GET(callpat, 1);
1059     }
1060     while (*callpat == OP_ALT);
1061    
1062     DPRINTF(("Recursion didn't match\n"));
1063     md->recursive = new_recursive.prevrec;
1064     if (new_recursive.offset_save != stacksave)
1065     (pcre_free)(new_recursive.offset_save);
1066     RRETURN(MATCH_NOMATCH);
1067     }
1068     /* Control never reaches here */
1069    
1070     /* "Once" brackets are like assertion brackets except that after a match,
1071     the point in the subject string is not moved back. Thus there can never be
1072     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1073     Check the alternative branches in turn - the matching won't pass the KET
1074     for this kind of subpattern. If any one branch matches, we carry on as at
1075     the end of a normal bracket, leaving the subject pointer. */
1076    
1077     case OP_ONCE:
1078 nigel 91 prev = ecode;
1079     saved_eptr = eptr;
1080    
1081     do
1082 nigel 77 {
1083 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1084 nigel 91 if (rrc == MATCH_MATCH) break;
1085 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1086 nigel 91 ecode += GET(ecode,1);
1087     }
1088     while (*ecode == OP_ALT);
1089 nigel 77
1090 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1091 nigel 77
1092 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1093 nigel 77
1094 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1095     mark, since extracts may have been taken. */
1096 nigel 77
1097 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1098 nigel 77
1099 nigel 91 offset_top = md->end_offset_top;
1100     eptr = md->end_match_ptr;
1101 nigel 77
1102 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1103     happens for a repeating ket if no characters were matched in the group.
1104     This is the forcible breaking of infinite loops as implemented in Perl
1105     5.005. If there is an options reset, it will get obeyed in the normal
1106     course of events. */
1107 nigel 77
1108 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1109     {
1110     ecode += 1+LINK_SIZE;
1111     break;
1112     }
1113 nigel 77
1114 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1115     preceding bracket, in the appropriate order. The second "call" of match()
1116     uses tail recursion, to avoid using another stack frame. We need to reset
1117     any options that changed within the bracket before re-running it, so
1118     check the next opcode. */
1119 nigel 77
1120 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1121     {
1122     ims = (ims & ~PCRE_IMS) | ecode[4];
1123     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1124     }
1125 nigel 77
1126 nigel 91 if (*ecode == OP_KETRMIN)
1127     {
1128 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1129 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1130     ecode = prev;
1131 ph10 197 flags = 0;
1132 nigel 91 goto TAIL_RECURSE;
1133 nigel 77 }
1134 nigel 91 else /* OP_KETRMAX */
1135     {
1136 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1137 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1138     ecode += 1 + LINK_SIZE;
1139 ph10 197 flags = 0;
1140 nigel 91 goto TAIL_RECURSE;
1141     }
1142     /* Control never gets here */
1143 nigel 77
1144     /* An alternation is the end of a branch; scan along to find the end of the
1145     bracketed group and go to there. */
1146    
1147     case OP_ALT:
1148     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149     break;
1150    
1151 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152     indicating that it may occur zero times. It may repeat infinitely, or not
1153     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154     with fixed upper repeat limits are compiled as a number of copies, with the
1155     optional ones preceded by BRAZERO or BRAMINZERO. */
1156 nigel 77
1157     case OP_BRAZERO:
1158     {
1159     next = ecode+1;
1160 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1161 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1162     do next += GET(next,1); while (*next == OP_ALT);
1163 nigel 93 ecode = next + 1 + LINK_SIZE;
1164 nigel 77 }
1165     break;
1166    
1167     case OP_BRAMINZERO:
1168     {
1169     next = ecode+1;
1170 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1171 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1172 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1173     ecode++;
1174     }
1175     break;
1176    
1177 ph10 335 case OP_SKIPZERO:
1178     {
1179     next = ecode+1;
1180     do next += GET(next,1); while (*next == OP_ALT);
1181     ecode = next + 1 + LINK_SIZE;
1182     }
1183     break;
1184    
1185 nigel 93 /* End of a group, repeated or non-repeating. */
1186 nigel 77
1187     case OP_KET:
1188     case OP_KETRMIN:
1189     case OP_KETRMAX:
1190 nigel 91 prev = ecode - GET(ecode, 1);
1191 nigel 77
1192 nigel 93 /* If this was a group that remembered the subject start, in order to break
1193     infinite repeats of empty string matches, retrieve the subject start from
1194     the chain. Otherwise, set it NULL. */
1195 nigel 77
1196 nigel 93 if (*prev >= OP_SBRA)
1197     {
1198     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1199     eptrb = eptrb->epb_prev; /* Backup to previous group */
1200     }
1201     else saved_eptr = NULL;
1202 nigel 77
1203 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1204     MATCH_MATCH, but record the current high water mark for use by positive
1205     assertions. Do this also for the "once" (atomic) groups. */
1206    
1207 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1208     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1209     *prev == OP_ONCE)
1210     {
1211     md->end_match_ptr = eptr; /* For ONCE */
1212     md->end_offset_top = offset_top;
1213     RRETURN(MATCH_MATCH);
1214     }
1215 nigel 77
1216 nigel 93 /* For capturing groups we have to check the group number back at the start
1217     and if necessary complete handling an extraction by setting the offsets and
1218     bumping the high water mark. Note that whole-pattern recursion is coded as
1219     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1220     when the OP_END is reached. Other recursion is handled here. */
1221 nigel 77
1222 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1223 nigel 91 {
1224 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1225 nigel 91 offset = number << 1;
1226 nigel 77
1227     #ifdef DEBUG
1228 nigel 91 printf("end bracket %d", number);
1229     printf("\n");
1230 nigel 77 #endif
1231    
1232 nigel 93 md->capture_last = number;
1233     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1234 nigel 91 {
1235 nigel 93 md->offset_vector[offset] =
1236     md->offset_vector[md->offset_end - number];
1237     md->offset_vector[offset+1] = eptr - md->start_subject;
1238     if (offset_top <= offset) offset_top = offset + 2;
1239     }
1240 nigel 77
1241 nigel 93 /* Handle a recursively called group. Restore the offsets
1242     appropriately and continue from after the call. */
1243 nigel 77
1244 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1245     {
1246     recursion_info *rec = md->recursive;
1247     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1248     md->recursive = rec->prevrec;
1249 ph10 168 mstart = rec->save_start;
1250 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1251     rec->saved_max * sizeof(int));
1252     ecode = rec->after_call;
1253     ims = original_ims;
1254     break;
1255 nigel 77 }
1256 nigel 91 }
1257 nigel 77
1258 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1259     flags, in case they got changed during the group. */
1260 nigel 77
1261 nigel 91 ims = original_ims;
1262     DPRINTF(("ims reset to %02lx\n", ims));
1263 nigel 77
1264 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1265     happens for a repeating ket if no characters were matched in the group.
1266     This is the forcible breaking of infinite loops as implemented in Perl
1267     5.005. If there is an options reset, it will get obeyed in the normal
1268     course of events. */
1269 nigel 77
1270 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1271     {
1272     ecode += 1 + LINK_SIZE;
1273     break;
1274     }
1275 nigel 77
1276 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1277     preceding bracket, in the appropriate order. In the second case, we can use
1278 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1279     unlimited repeat of a group that can match an empty string. */
1280 nigel 77
1281 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1282    
1283 nigel 91 if (*ecode == OP_KETRMIN)
1284     {
1285 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1286 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1287 ph10 197 if (flags != 0) /* Could match an empty string */
1288     {
1289     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1290     RRETURN(rrc);
1291     }
1292 nigel 91 ecode = prev;
1293     goto TAIL_RECURSE;
1294 nigel 77 }
1295 nigel 91 else /* OP_KETRMAX */
1296     {
1297 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1298 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1299     ecode += 1 + LINK_SIZE;
1300 ph10 197 flags = 0;
1301 nigel 91 goto TAIL_RECURSE;
1302     }
1303     /* Control never gets here */
1304 nigel 77
1305     /* Start of subject unless notbol, or after internal newline if multiline */
1306    
1307     case OP_CIRC:
1308     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1309     if ((ims & PCRE_MULTILINE) != 0)
1310     {
1311 nigel 91 if (eptr != md->start_subject &&
1312 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1313 nigel 77 RRETURN(MATCH_NOMATCH);
1314     ecode++;
1315     break;
1316     }
1317     /* ... else fall through */
1318    
1319     /* Start of subject assertion */
1320    
1321     case OP_SOD:
1322     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1323     ecode++;
1324     break;
1325    
1326     /* Start of match assertion */
1327    
1328     case OP_SOM:
1329     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1330     ecode++;
1331     break;
1332 ph10 172
1333 ph10 168 /* Reset the start of match point */
1334 ph10 172
1335 ph10 168 case OP_SET_SOM:
1336     mstart = eptr;
1337 ph10 172 ecode++;
1338     break;
1339 nigel 77
1340     /* Assert before internal newline if multiline, or before a terminating
1341     newline unless endonly is set, else end of subject unless noteol is set. */
1342    
1343     case OP_DOLL:
1344     if ((ims & PCRE_MULTILINE) != 0)
1345     {
1346     if (eptr < md->end_subject)
1347 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1348 nigel 77 else
1349     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1350     ecode++;
1351     break;
1352     }
1353     else
1354     {
1355     if (md->noteol) RRETURN(MATCH_NOMATCH);
1356     if (!md->endonly)
1357     {
1358 nigel 91 if (eptr != md->end_subject &&
1359 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1360 nigel 77 RRETURN(MATCH_NOMATCH);
1361     ecode++;
1362     break;
1363     }
1364     }
1365 nigel 91 /* ... else fall through for endonly */
1366 nigel 77
1367     /* End of subject assertion (\z) */
1368    
1369     case OP_EOD:
1370     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1371     ecode++;
1372     break;
1373    
1374     /* End of subject or ending \n assertion (\Z) */
1375    
1376     case OP_EODN:
1377 nigel 91 if (eptr != md->end_subject &&
1378 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1379 nigel 91 RRETURN(MATCH_NOMATCH);
1380 nigel 77 ecode++;
1381     break;
1382    
1383     /* Word boundary assertions */
1384    
1385     case OP_NOT_WORD_BOUNDARY:
1386     case OP_WORD_BOUNDARY:
1387     {
1388    
1389     /* Find out if the previous and current characters are "word" characters.
1390     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1391     be "non-word" characters. */
1392    
1393     #ifdef SUPPORT_UTF8
1394     if (utf8)
1395     {
1396     if (eptr == md->start_subject) prev_is_word = FALSE; else
1397     {
1398     const uschar *lastptr = eptr - 1;
1399     while((*lastptr & 0xc0) == 0x80) lastptr--;
1400     GETCHAR(c, lastptr);
1401     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1402     }
1403     if (eptr >= md->end_subject) cur_is_word = FALSE; else
1404     {
1405     GETCHAR(c, eptr);
1406     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1407     }
1408     }
1409     else
1410     #endif
1411    
1412     /* More streamlined when not in UTF-8 mode */
1413    
1414     {
1415     prev_is_word = (eptr != md->start_subject) &&
1416     ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1417     cur_is_word = (eptr < md->end_subject) &&
1418     ((md->ctypes[*eptr] & ctype_word) != 0);
1419     }
1420    
1421     /* Now see if the situation is what we want */
1422    
1423     if ((*ecode++ == OP_WORD_BOUNDARY)?
1424     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1425     RRETURN(MATCH_NOMATCH);
1426     }
1427     break;
1428    
1429     /* Match a single character type; inline for speed */
1430    
1431     case OP_ANY:
1432 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433 ph10 345 /* Fall through */
1434    
1435 ph10 341 case OP_ALLANY:
1436 nigel 77 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1438 nigel 77 ecode++;
1439     break;
1440    
1441     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1442     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1443    
1444     case OP_ANYBYTE:
1445     if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446     ecode++;
1447     break;
1448    
1449     case OP_NOT_DIGIT:
1450     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1451     GETCHARINCTEST(c, eptr);
1452     if (
1453     #ifdef SUPPORT_UTF8
1454     c < 256 &&
1455     #endif
1456     (md->ctypes[c] & ctype_digit) != 0
1457     )
1458     RRETURN(MATCH_NOMATCH);
1459     ecode++;
1460     break;
1461    
1462     case OP_DIGIT:
1463     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1464     GETCHARINCTEST(c, eptr);
1465     if (
1466     #ifdef SUPPORT_UTF8
1467     c >= 256 ||
1468     #endif
1469     (md->ctypes[c] & ctype_digit) == 0
1470     )
1471     RRETURN(MATCH_NOMATCH);
1472     ecode++;
1473     break;
1474    
1475     case OP_NOT_WHITESPACE:
1476     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1477     GETCHARINCTEST(c, eptr);
1478     if (
1479     #ifdef SUPPORT_UTF8
1480     c < 256 &&
1481     #endif
1482     (md->ctypes[c] & ctype_space) != 0
1483     )
1484     RRETURN(MATCH_NOMATCH);
1485     ecode++;
1486     break;
1487    
1488     case OP_WHITESPACE:
1489     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1490     GETCHARINCTEST(c, eptr);
1491     if (
1492     #ifdef SUPPORT_UTF8
1493     c >= 256 ||
1494     #endif
1495     (md->ctypes[c] & ctype_space) == 0
1496     )
1497     RRETURN(MATCH_NOMATCH);
1498     ecode++;
1499     break;
1500    
1501     case OP_NOT_WORDCHAR:
1502     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1503     GETCHARINCTEST(c, eptr);
1504     if (
1505     #ifdef SUPPORT_UTF8
1506     c < 256 &&
1507     #endif
1508     (md->ctypes[c] & ctype_word) != 0
1509     )
1510     RRETURN(MATCH_NOMATCH);
1511     ecode++;
1512     break;
1513    
1514     case OP_WORDCHAR:
1515     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1516     GETCHARINCTEST(c, eptr);
1517     if (
1518     #ifdef SUPPORT_UTF8
1519     c >= 256 ||
1520     #endif
1521     (md->ctypes[c] & ctype_word) == 0
1522     )
1523     RRETURN(MATCH_NOMATCH);
1524     ecode++;
1525     break;
1526    
1527 nigel 93 case OP_ANYNL:
1528     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1529     GETCHARINCTEST(c, eptr);
1530     switch(c)
1531     {
1532     default: RRETURN(MATCH_NOMATCH);
1533     case 0x000d:
1534     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1535     break;
1536 ph10 231
1537 nigel 93 case 0x000a:
1538 ph10 231 break;
1539    
1540 nigel 93 case 0x000b:
1541     case 0x000c:
1542     case 0x0085:
1543     case 0x2028:
1544     case 0x2029:
1545 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1546 nigel 93 break;
1547     }
1548     ecode++;
1549     break;
1550    
1551 ph10 178 case OP_NOT_HSPACE:
1552     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1553     GETCHARINCTEST(c, eptr);
1554     switch(c)
1555     {
1556     default: break;
1557     case 0x09: /* HT */
1558     case 0x20: /* SPACE */
1559     case 0xa0: /* NBSP */
1560     case 0x1680: /* OGHAM SPACE MARK */
1561     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1562     case 0x2000: /* EN QUAD */
1563     case 0x2001: /* EM QUAD */
1564     case 0x2002: /* EN SPACE */
1565     case 0x2003: /* EM SPACE */
1566     case 0x2004: /* THREE-PER-EM SPACE */
1567     case 0x2005: /* FOUR-PER-EM SPACE */
1568     case 0x2006: /* SIX-PER-EM SPACE */
1569     case 0x2007: /* FIGURE SPACE */
1570     case 0x2008: /* PUNCTUATION SPACE */
1571     case 0x2009: /* THIN SPACE */
1572     case 0x200A: /* HAIR SPACE */
1573     case 0x202f: /* NARROW NO-BREAK SPACE */
1574     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1575     case 0x3000: /* IDEOGRAPHIC SPACE */
1576     RRETURN(MATCH_NOMATCH);
1577     }
1578     ecode++;
1579     break;
1580    
1581     case OP_HSPACE:
1582     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1583     GETCHARINCTEST(c, eptr);
1584     switch(c)
1585     {
1586     default: RRETURN(MATCH_NOMATCH);
1587     case 0x09: /* HT */
1588     case 0x20: /* SPACE */
1589     case 0xa0: /* NBSP */
1590     case 0x1680: /* OGHAM SPACE MARK */
1591     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1592     case 0x2000: /* EN QUAD */
1593     case 0x2001: /* EM QUAD */
1594     case 0x2002: /* EN SPACE */
1595     case 0x2003: /* EM SPACE */
1596     case 0x2004: /* THREE-PER-EM SPACE */
1597     case 0x2005: /* FOUR-PER-EM SPACE */
1598     case 0x2006: /* SIX-PER-EM SPACE */
1599     case 0x2007: /* FIGURE SPACE */
1600     case 0x2008: /* PUNCTUATION SPACE */
1601     case 0x2009: /* THIN SPACE */
1602     case 0x200A: /* HAIR SPACE */
1603     case 0x202f: /* NARROW NO-BREAK SPACE */
1604     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1605     case 0x3000: /* IDEOGRAPHIC SPACE */
1606     break;
1607     }
1608     ecode++;
1609     break;
1610    
1611     case OP_NOT_VSPACE:
1612     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1613     GETCHARINCTEST(c, eptr);
1614     switch(c)
1615     {
1616     default: break;
1617     case 0x0a: /* LF */
1618     case 0x0b: /* VT */
1619     case 0x0c: /* FF */
1620     case 0x0d: /* CR */
1621     case 0x85: /* NEL */
1622     case 0x2028: /* LINE SEPARATOR */
1623     case 0x2029: /* PARAGRAPH SEPARATOR */
1624     RRETURN(MATCH_NOMATCH);
1625     }
1626     ecode++;
1627     break;
1628    
1629     case OP_VSPACE:
1630     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1631     GETCHARINCTEST(c, eptr);
1632     switch(c)
1633     {
1634     default: RRETURN(MATCH_NOMATCH);
1635     case 0x0a: /* LF */
1636     case 0x0b: /* VT */
1637     case 0x0c: /* FF */
1638     case 0x0d: /* CR */
1639     case 0x85: /* NEL */
1640     case 0x2028: /* LINE SEPARATOR */
1641     case 0x2029: /* PARAGRAPH SEPARATOR */
1642     break;
1643     }
1644     ecode++;
1645     break;
1646    
1647 nigel 77 #ifdef SUPPORT_UCP
1648     /* Check the next character by Unicode property. We will get here only
1649     if the support is in the binary; otherwise a compile-time error occurs. */
1650    
1651     case OP_PROP:
1652     case OP_NOTPROP:
1653     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1654     GETCHARINCTEST(c, eptr);
1655     {
1656 nigel 87 int chartype, script;
1657     int category = _pcre_ucp_findprop(c, &chartype, &script);
1658 nigel 77
1659 nigel 87 switch(ecode[1])
1660     {
1661     case PT_ANY:
1662     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1663     break;
1664 nigel 77
1665 nigel 87 case PT_LAMP:
1666     if ((chartype == ucp_Lu ||
1667     chartype == ucp_Ll ||
1668     chartype == ucp_Lt) == (op == OP_NOTPROP))
1669 nigel 77 RRETURN(MATCH_NOMATCH);
1670 nigel 87 break;
1671    
1672     case PT_GC:
1673     if ((ecode[2] != category) == (op == OP_PROP))
1674 nigel 77 RRETURN(MATCH_NOMATCH);
1675 nigel 87 break;
1676    
1677     case PT_PC:
1678     if ((ecode[2] != chartype) == (op == OP_PROP))
1679     RRETURN(MATCH_NOMATCH);
1680     break;
1681    
1682     case PT_SC:
1683     if ((ecode[2] != script) == (op == OP_PROP))
1684     RRETURN(MATCH_NOMATCH);
1685     break;
1686    
1687     default:
1688     RRETURN(PCRE_ERROR_INTERNAL);
1689 nigel 77 }
1690 nigel 87
1691     ecode += 3;
1692 nigel 77 }
1693     break;
1694    
1695     /* Match an extended Unicode sequence. We will get here only if the support
1696     is in the binary; otherwise a compile-time error occurs. */
1697    
1698     case OP_EXTUNI:
1699     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1700     GETCHARINCTEST(c, eptr);
1701     {
1702 nigel 87 int chartype, script;
1703     int category = _pcre_ucp_findprop(c, &chartype, &script);
1704 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1705     while (eptr < md->end_subject)
1706     {
1707     int len = 1;
1708     if (!utf8) c = *eptr; else
1709     {
1710     GETCHARLEN(c, eptr, len);
1711     }
1712 nigel 87 category = _pcre_ucp_findprop(c, &chartype, &script);
1713 nigel 77 if (category != ucp_M) break;
1714     eptr += len;
1715     }
1716     }
1717     ecode++;
1718     break;
1719     #endif
1720    
1721    
1722     /* Match a back reference, possibly repeatedly. Look past the end of the
1723     item to see if there is repeat information following. The code is similar
1724     to that for character classes, but repeated for efficiency. Then obey
1725     similar code to character type repeats - written out again for speed.
1726     However, if the referenced string is the empty string, always treat
1727     it as matched, any number of times (otherwise there could be infinite
1728     loops). */
1729    
1730     case OP_REF:
1731     {
1732     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1733 ph10 345 ecode += 3;
1734    
1735 ph10 336 /* If the reference is unset, there are two possibilities:
1736 ph10 345
1737 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
1738     than the amount of subject left; this ensures that every attempt at a
1739     match fails. We can't just fail here, because of the possibility of
1740     quantifiers with zero minima.
1741 ph10 345
1742     (b) If the JavaScript compatibility flag is set, set the length to zero
1743     so that the back reference matches an empty string.
1744    
1745     Otherwise, set the length to the length of what was matched by the
1746 ph10 336 referenced subpattern. */
1747 ph10 345
1748 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
1749 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1750 ph10 336 else
1751     length = md->offset_vector[offset+1] - md->offset_vector[offset];
1752 nigel 77
1753     /* Set up for repetition, or handle the non-repeated case */
1754    
1755     switch (*ecode)
1756     {
1757     case OP_CRSTAR:
1758     case OP_CRMINSTAR:
1759     case OP_CRPLUS:
1760     case OP_CRMINPLUS:
1761     case OP_CRQUERY:
1762     case OP_CRMINQUERY:
1763     c = *ecode++ - OP_CRSTAR;
1764     minimize = (c & 1) != 0;
1765     min = rep_min[c]; /* Pick up values from tables; */
1766     max = rep_max[c]; /* zero for max => infinity */
1767     if (max == 0) max = INT_MAX;
1768     break;
1769    
1770     case OP_CRRANGE:
1771     case OP_CRMINRANGE:
1772     minimize = (*ecode == OP_CRMINRANGE);
1773     min = GET2(ecode, 1);
1774     max = GET2(ecode, 3);
1775     if (max == 0) max = INT_MAX;
1776     ecode += 5;
1777     break;
1778    
1779     default: /* No repeat follows */
1780     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1781     eptr += length;
1782     continue; /* With the main loop */
1783     }
1784    
1785     /* If the length of the reference is zero, just continue with the
1786     main loop. */
1787    
1788     if (length == 0) continue;
1789    
1790     /* First, ensure the minimum number of matches are present. We get back
1791     the length of the reference string explicitly rather than passing the
1792     address of eptr, so that eptr can be a register variable. */
1793    
1794     for (i = 1; i <= min; i++)
1795     {
1796     if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1797     eptr += length;
1798     }
1799    
1800     /* If min = max, continue at the same level without recursion.
1801     They are not both allowed to be zero. */
1802    
1803     if (min == max) continue;
1804    
1805     /* If minimizing, keep trying and advancing the pointer */
1806    
1807     if (minimize)
1808     {
1809     for (fi = min;; fi++)
1810     {
1811 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1812 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1813     if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1814     RRETURN(MATCH_NOMATCH);
1815     eptr += length;
1816     }
1817     /* Control never gets here */
1818     }
1819    
1820     /* If maximizing, find the longest string and work backwards */
1821    
1822     else
1823     {
1824     pp = eptr;
1825     for (i = min; i < max; i++)
1826     {
1827     if (!match_ref(offset, eptr, length, md, ims)) break;
1828     eptr += length;
1829     }
1830     while (eptr >= pp)
1831     {
1832 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1833 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1834     eptr -= length;
1835     }
1836     RRETURN(MATCH_NOMATCH);
1837     }
1838     }
1839     /* Control never gets here */
1840    
1841    
1842    
1843     /* Match a bit-mapped character class, possibly repeatedly. This op code is
1844     used when all the characters in the class have values in the range 0-255,
1845     and either the matching is caseful, or the characters are in the range
1846     0-127 when UTF-8 processing is enabled. The only difference between
1847     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1848     encountered.
1849    
1850     First, look past the end of the item to see if there is repeat information
1851     following. Then obey similar code to character type repeats - written out
1852     again for speed. */
1853    
1854     case OP_NCLASS:
1855     case OP_CLASS:
1856     {
1857     data = ecode + 1; /* Save for matching */
1858     ecode += 33; /* Advance past the item */
1859    
1860     switch (*ecode)
1861     {
1862     case OP_CRSTAR:
1863     case OP_CRMINSTAR:
1864     case OP_CRPLUS:
1865     case OP_CRMINPLUS:
1866     case OP_CRQUERY:
1867     case OP_CRMINQUERY:
1868     c = *ecode++ - OP_CRSTAR;
1869     minimize = (c & 1) != 0;
1870     min = rep_min[c]; /* Pick up values from tables; */
1871     max = rep_max[c]; /* zero for max => infinity */
1872     if (max == 0) max = INT_MAX;
1873     break;
1874    
1875     case OP_CRRANGE:
1876     case OP_CRMINRANGE:
1877     minimize = (*ecode == OP_CRMINRANGE);
1878     min = GET2(ecode, 1);
1879     max = GET2(ecode, 3);
1880     if (max == 0) max = INT_MAX;
1881     ecode += 5;
1882     break;
1883    
1884     default: /* No repeat follows */
1885     min = max = 1;
1886     break;
1887     }
1888    
1889     /* First, ensure the minimum number of matches are present. */
1890    
1891     #ifdef SUPPORT_UTF8
1892     /* UTF-8 mode */
1893     if (utf8)
1894     {
1895     for (i = 1; i <= min; i++)
1896     {
1897     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898     GETCHARINC(c, eptr);
1899     if (c > 255)
1900     {
1901     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1902     }
1903     else
1904     {
1905     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1906     }
1907     }
1908     }
1909     else
1910     #endif
1911     /* Not UTF-8 mode */
1912     {
1913     for (i = 1; i <= min; i++)
1914     {
1915     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1916     c = *eptr++;
1917     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1918     }
1919     }
1920    
1921     /* If max == min we can continue with the main loop without the
1922     need to recurse. */
1923    
1924     if (min == max) continue;
1925    
1926     /* If minimizing, keep testing the rest of the expression and advancing
1927     the pointer while it matches the class. */
1928    
1929     if (minimize)
1930     {
1931     #ifdef SUPPORT_UTF8
1932     /* UTF-8 mode */
1933     if (utf8)
1934     {
1935     for (fi = min;; fi++)
1936     {
1937 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1938 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1939     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1940     GETCHARINC(c, eptr);
1941     if (c > 255)
1942     {
1943     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1944     }
1945     else
1946     {
1947     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1948     }
1949     }
1950     }
1951     else
1952     #endif
1953     /* Not UTF-8 mode */
1954     {
1955     for (fi = min;; fi++)
1956     {
1957 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1958 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1959     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1960     c = *eptr++;
1961     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1962     }
1963     }
1964     /* Control never gets here */
1965     }
1966    
1967     /* If maximizing, find the longest possible run, then work backwards. */
1968    
1969     else
1970     {
1971     pp = eptr;
1972    
1973     #ifdef SUPPORT_UTF8
1974     /* UTF-8 mode */
1975     if (utf8)
1976     {
1977     for (i = min; i < max; i++)
1978     {
1979     int len = 1;
1980     if (eptr >= md->end_subject) break;
1981     GETCHARLEN(c, eptr, len);
1982     if (c > 255)
1983     {
1984     if (op == OP_CLASS) break;
1985     }
1986     else
1987     {
1988     if ((data[c/8] & (1 << (c&7))) == 0) break;
1989     }
1990     eptr += len;
1991     }
1992     for (;;)
1993     {
1994 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1995 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1996     if (eptr-- == pp) break; /* Stop if tried at original pos */
1997     BACKCHAR(eptr);
1998     }
1999     }
2000     else
2001     #endif
2002     /* Not UTF-8 mode */
2003     {
2004     for (i = min; i < max; i++)
2005     {
2006     if (eptr >= md->end_subject) break;
2007     c = *eptr;
2008     if ((data[c/8] & (1 << (c&7))) == 0) break;
2009     eptr++;
2010     }
2011     while (eptr >= pp)
2012     {
2013 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2014 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2015 nigel 77 eptr--;
2016     }
2017     }
2018    
2019     RRETURN(MATCH_NOMATCH);
2020     }
2021     }
2022     /* Control never gets here */
2023    
2024    
2025     /* Match an extended character class. This opcode is encountered only
2026     in UTF-8 mode, because that's the only time it is compiled. */
2027    
2028     #ifdef SUPPORT_UTF8
2029     case OP_XCLASS:
2030     {
2031     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2032     ecode += GET(ecode, 1); /* Advance past the item */
2033    
2034     switch (*ecode)
2035     {
2036     case OP_CRSTAR:
2037     case OP_CRMINSTAR:
2038     case OP_CRPLUS:
2039     case OP_CRMINPLUS:
2040     case OP_CRQUERY:
2041     case OP_CRMINQUERY:
2042     c = *ecode++ - OP_CRSTAR;
2043     minimize = (c & 1) != 0;
2044     min = rep_min[c]; /* Pick up values from tables; */
2045     max = rep_max[c]; /* zero for max => infinity */
2046     if (max == 0) max = INT_MAX;
2047     break;
2048    
2049     case OP_CRRANGE:
2050     case OP_CRMINRANGE:
2051     minimize = (*ecode == OP_CRMINRANGE);
2052     min = GET2(ecode, 1);
2053     max = GET2(ecode, 3);
2054     if (max == 0) max = INT_MAX;
2055     ecode += 5;
2056     break;
2057    
2058     default: /* No repeat follows */
2059     min = max = 1;
2060     break;
2061     }
2062    
2063     /* First, ensure the minimum number of matches are present. */
2064    
2065     for (i = 1; i <= min; i++)
2066     {
2067     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2068     GETCHARINC(c, eptr);
2069     if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2070     }
2071    
2072     /* If max == min we can continue with the main loop without the
2073     need to recurse. */
2074    
2075     if (min == max) continue;
2076    
2077     /* If minimizing, keep testing the rest of the expression and advancing
2078     the pointer while it matches the class. */
2079    
2080     if (minimize)
2081     {
2082     for (fi = min;; fi++)
2083     {
2084 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2085 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087     GETCHARINC(c, eptr);
2088     if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2089     }
2090     /* Control never gets here */
2091     }
2092    
2093     /* If maximizing, find the longest possible run, then work backwards. */
2094    
2095     else
2096     {
2097     pp = eptr;
2098     for (i = min; i < max; i++)
2099     {
2100     int len = 1;
2101     if (eptr >= md->end_subject) break;
2102     GETCHARLEN(c, eptr, len);
2103     if (!_pcre_xclass(c, data)) break;
2104     eptr += len;
2105     }
2106     for(;;)
2107     {
2108 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2109 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2110     if (eptr-- == pp) break; /* Stop if tried at original pos */
2111 ph10 214 if (utf8) BACKCHAR(eptr);
2112 nigel 77 }
2113     RRETURN(MATCH_NOMATCH);
2114     }
2115    
2116     /* Control never gets here */
2117     }
2118     #endif /* End of XCLASS */
2119    
2120     /* Match a single character, casefully */
2121    
2122     case OP_CHAR:
2123     #ifdef SUPPORT_UTF8
2124     if (utf8)
2125     {
2126     length = 1;
2127     ecode++;
2128     GETCHARLEN(fc, ecode, length);
2129     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2130     while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2131     }
2132     else
2133     #endif
2134    
2135     /* Non-UTF-8 mode */
2136     {
2137     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2138     if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2139     ecode += 2;
2140     }
2141     break;
2142    
2143     /* Match a single character, caselessly */
2144    
2145     case OP_CHARNC:
2146     #ifdef SUPPORT_UTF8
2147     if (utf8)
2148     {
2149     length = 1;
2150     ecode++;
2151     GETCHARLEN(fc, ecode, length);
2152    
2153     if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2154    
2155     /* If the pattern character's value is < 128, we have only one byte, and
2156     can use the fast lookup table. */
2157    
2158     if (fc < 128)
2159     {
2160     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2161     }
2162    
2163     /* Otherwise we must pick up the subject character */
2164    
2165     else
2166     {
2167 nigel 93 unsigned int dc;
2168 nigel 77 GETCHARINC(dc, eptr);
2169     ecode += length;
2170    
2171     /* If we have Unicode property support, we can use it to test the other
2172 nigel 87 case of the character, if there is one. */
2173 nigel 77
2174     if (fc != dc)
2175     {
2176     #ifdef SUPPORT_UCP
2177 nigel 87 if (dc != _pcre_ucp_othercase(fc))
2178 nigel 77 #endif
2179     RRETURN(MATCH_NOMATCH);
2180     }
2181     }
2182     }
2183     else
2184     #endif /* SUPPORT_UTF8 */
2185    
2186     /* Non-UTF-8 mode */
2187     {
2188     if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2189     if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2190     ecode += 2;
2191     }
2192     break;
2193    
2194 nigel 93 /* Match a single character repeatedly. */
2195 nigel 77
2196     case OP_EXACT:
2197     min = max = GET2(ecode, 1);
2198     ecode += 3;
2199     goto REPEATCHAR;
2200    
2201 nigel 93 case OP_POSUPTO:
2202     possessive = TRUE;
2203     /* Fall through */
2204    
2205 nigel 77 case OP_UPTO:
2206     case OP_MINUPTO:
2207     min = 0;
2208     max = GET2(ecode, 1);
2209     minimize = *ecode == OP_MINUPTO;
2210     ecode += 3;
2211     goto REPEATCHAR;
2212    
2213 nigel 93 case OP_POSSTAR:
2214     possessive = TRUE;
2215     min = 0;
2216     max = INT_MAX;
2217     ecode++;
2218     goto REPEATCHAR;
2219    
2220     case OP_POSPLUS:
2221     possessive = TRUE;
2222     min = 1;
2223     max = INT_MAX;
2224     ecode++;
2225     goto REPEATCHAR;
2226    
2227     case OP_POSQUERY:
2228     possessive = TRUE;
2229     min = 0;
2230     max = 1;
2231     ecode++;
2232     goto REPEATCHAR;
2233    
2234 nigel 77 case OP_STAR:
2235     case OP_MINSTAR:
2236     case OP_PLUS:
2237     case OP_MINPLUS:
2238     case OP_QUERY:
2239     case OP_MINQUERY:
2240     c = *ecode++ - OP_STAR;
2241     minimize = (c & 1) != 0;
2242     min = rep_min[c]; /* Pick up values from tables; */
2243     max = rep_max[c]; /* zero for max => infinity */
2244     if (max == 0) max = INT_MAX;
2245    
2246     /* Common code for all repeated single-character matches. We can give
2247     up quickly if there are fewer than the minimum number of characters left in
2248     the subject. */
2249    
2250     REPEATCHAR:
2251     #ifdef SUPPORT_UTF8
2252     if (utf8)
2253     {
2254     length = 1;
2255     charptr = ecode;
2256     GETCHARLEN(fc, ecode, length);
2257     if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2258     ecode += length;
2259    
2260     /* Handle multibyte character matching specially here. There is
2261     support for caseless matching if UCP support is present. */
2262    
2263     if (length > 1)
2264     {
2265     #ifdef SUPPORT_UCP
2266 nigel 93 unsigned int othercase;
2267 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2268 nigel 93 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2269 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2270 ph10 115 else oclength = 0;
2271 nigel 77 #endif /* SUPPORT_UCP */
2272    
2273     for (i = 1; i <= min; i++)
2274     {
2275     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2276 ph10 123 #ifdef SUPPORT_UCP
2277 nigel 77 /* Need braces because of following else */
2278     else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2279     else
2280     {
2281     if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2282     eptr += oclength;
2283     }
2284 ph10 115 #else /* without SUPPORT_UCP */
2285     else { RRETURN(MATCH_NOMATCH); }
2286 ph10 123 #endif /* SUPPORT_UCP */
2287 nigel 77 }
2288    
2289     if (min == max) continue;
2290    
2291     if (minimize)
2292     {
2293     for (fi = min;; fi++)
2294     {
2295 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2296 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2297     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2298     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2299 ph10 123 #ifdef SUPPORT_UCP
2300 nigel 77 /* Need braces because of following else */
2301     else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2302     else
2303     {
2304     if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2305     eptr += oclength;
2306     }
2307 ph10 115 #else /* without SUPPORT_UCP */
2308     else { RRETURN (MATCH_NOMATCH); }
2309     #endif /* SUPPORT_UCP */
2310 nigel 77 }
2311     /* Control never gets here */
2312     }
2313 nigel 93
2314     else /* Maximize */
2315 nigel 77 {
2316     pp = eptr;
2317     for (i = min; i < max; i++)
2318     {
2319     if (eptr > md->end_subject - length) break;
2320     if (memcmp(eptr, charptr, length) == 0) eptr += length;
2321 ph10 123 #ifdef SUPPORT_UCP
2322 nigel 77 else if (oclength == 0) break;
2323     else
2324     {
2325     if (memcmp(eptr, occhars, oclength) != 0) break;
2326     eptr += oclength;
2327     }
2328 ph10 115 #else /* without SUPPORT_UCP */
2329     else break;
2330 ph10 123 #endif /* SUPPORT_UCP */
2331 nigel 77 }
2332 nigel 93
2333     if (possessive) continue;
2334 ph10 120 for(;;)
2335 nigel 77 {
2336 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2337 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2338 ph10 120 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2339 ph10 115 #ifdef SUPPORT_UCP
2340     eptr--;
2341     BACKCHAR(eptr);
2342 ph10 123 #else /* without SUPPORT_UCP */
2343 nigel 77 eptr -= length;
2344 ph10 123 #endif /* SUPPORT_UCP */
2345 nigel 77 }
2346     }
2347     /* Control never gets here */
2348     }
2349    
2350     /* If the length of a UTF-8 character is 1, we fall through here, and
2351     obey the code as for non-UTF-8 characters below, though in this case the
2352     value of fc will always be < 128. */
2353     }
2354     else
2355     #endif /* SUPPORT_UTF8 */
2356    
2357     /* When not in UTF-8 mode, load a single-byte character. */
2358     {
2359     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2360     fc = *ecode++;
2361     }
2362    
2363     /* The value of fc at this point is always less than 256, though we may or
2364     may not be in UTF-8 mode. The code is duplicated for the caseless and
2365     caseful cases, for speed, since matching characters is likely to be quite
2366     common. First, ensure the minimum number of matches are present. If min =
2367     max, continue at the same level without recursing. Otherwise, if
2368     minimizing, keep trying the rest of the expression and advancing one
2369     matching character if failing, up to the maximum. Alternatively, if
2370     maximizing, find the maximum number of characters and work backwards. */
2371    
2372     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2373     max, eptr));
2374    
2375     if ((ims & PCRE_CASELESS) != 0)
2376     {
2377     fc = md->lcc[fc];
2378     for (i = 1; i <= min; i++)
2379     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2380     if (min == max) continue;
2381     if (minimize)
2382     {
2383     for (fi = min;; fi++)
2384     {
2385 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2386 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2387     if (fi >= max || eptr >= md->end_subject ||
2388     fc != md->lcc[*eptr++])
2389     RRETURN(MATCH_NOMATCH);
2390     }
2391     /* Control never gets here */
2392     }
2393 nigel 93 else /* Maximize */
2394 nigel 77 {
2395     pp = eptr;
2396     for (i = min; i < max; i++)
2397     {
2398     if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2399     eptr++;
2400     }
2401 nigel 93 if (possessive) continue;
2402 nigel 77 while (eptr >= pp)
2403     {
2404 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2405 nigel 77 eptr--;
2406     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2407     }
2408     RRETURN(MATCH_NOMATCH);
2409     }
2410     /* Control never gets here */
2411     }
2412    
2413     /* Caseful comparisons (includes all multi-byte characters) */
2414    
2415     else
2416     {
2417     for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2418     if (min == max) continue;
2419     if (minimize)
2420     {
2421     for (fi = min;; fi++)
2422     {
2423 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2424 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2425     if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2426     RRETURN(MATCH_NOMATCH);
2427     }
2428     /* Control never gets here */
2429     }
2430 nigel 93 else /* Maximize */
2431 nigel 77 {
2432     pp = eptr;
2433     for (i = min; i < max; i++)
2434     {
2435     if (eptr >= md->end_subject || fc != *eptr) break;
2436     eptr++;
2437     }
2438 nigel 93 if (possessive) continue;
2439 nigel 77 while (eptr >= pp)
2440     {
2441 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2442 nigel 77 eptr--;
2443     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2444     }
2445     RRETURN(MATCH_NOMATCH);
2446     }
2447     }
2448     /* Control never gets here */
2449    
2450     /* Match a negated single one-byte character. The character we are
2451     checking can be multibyte. */
2452    
2453     case OP_NOT:
2454     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2455     ecode++;
2456     GETCHARINCTEST(c, eptr);
2457     if ((ims & PCRE_CASELESS) != 0)
2458     {
2459     #ifdef SUPPORT_UTF8
2460     if (c < 256)
2461     #endif
2462     c = md->lcc[c];
2463     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2464     }
2465     else
2466     {
2467     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2468     }
2469     break;
2470    
2471     /* Match a negated single one-byte character repeatedly. This is almost a
2472     repeat of the code for a repeated single character, but I haven't found a
2473     nice way of commoning these up that doesn't require a test of the
2474     positive/negative option for each character match. Maybe that wouldn't add
2475     very much to the time taken, but character matching *is* what this is all
2476     about... */
2477    
2478     case OP_NOTEXACT:
2479     min = max = GET2(ecode, 1);
2480     ecode += 3;
2481     goto REPEATNOTCHAR;
2482    
2483     case OP_NOTUPTO:
2484     case OP_NOTMINUPTO:
2485     min = 0;
2486     max = GET2(ecode, 1);
2487     minimize = *ecode == OP_NOTMINUPTO;
2488     ecode += 3;
2489     goto REPEATNOTCHAR;
2490    
2491 nigel 93 case OP_NOTPOSSTAR:
2492     possessive = TRUE;
2493     min = 0;
2494     max = INT_MAX;
2495     ecode++;
2496     goto REPEATNOTCHAR;
2497    
2498     case OP_NOTPOSPLUS:
2499     possessive = TRUE;
2500     min = 1;
2501     max = INT_MAX;
2502     ecode++;
2503     goto REPEATNOTCHAR;
2504    
2505     case OP_NOTPOSQUERY:
2506     possessive = TRUE;
2507     min = 0;
2508     max = 1;
2509     ecode++;
2510     goto REPEATNOTCHAR;
2511    
2512     case OP_NOTPOSUPTO:
2513     possessive = TRUE;
2514     min = 0;
2515     max = GET2(ecode, 1);
2516     ecode += 3;
2517     goto REPEATNOTCHAR;
2518    
2519 nigel 77 case OP_NOTSTAR:
2520     case OP_NOTMINSTAR:
2521     case OP_NOTPLUS:
2522     case OP_NOTMINPLUS:
2523     case OP_NOTQUERY:
2524     case OP_NOTMINQUERY:
2525     c = *ecode++ - OP_NOTSTAR;
2526     minimize = (c & 1) != 0;
2527     min = rep_min[c]; /* Pick up values from tables; */
2528     max = rep_max[c]; /* zero for max => infinity */
2529     if (max == 0) max = INT_MAX;
2530    
2531     /* Common code for all repeated single-byte matches. We can give up quickly
2532     if there are fewer than the minimum number of bytes left in the
2533     subject. */
2534    
2535     REPEATNOTCHAR:
2536     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2537     fc = *ecode++;
2538    
2539     /* The code is duplicated for the caseless and caseful cases, for speed,
2540     since matching characters is likely to be quite common. First, ensure the
2541     minimum number of matches are present. If min = max, continue at the same
2542     level without recursing. Otherwise, if minimizing, keep trying the rest of
2543     the expression and advancing one matching character if failing, up to the
2544     maximum. Alternatively, if maximizing, find the maximum number of
2545     characters and work backwards. */
2546    
2547     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2548     max, eptr));
2549    
2550     if ((ims & PCRE_CASELESS) != 0)
2551     {
2552     fc = md->lcc[fc];
2553    
2554     #ifdef SUPPORT_UTF8
2555     /* UTF-8 mode */
2556     if (utf8)
2557     {
2558 nigel 93 register unsigned int d;
2559 nigel 77 for (i = 1; i <= min; i++)
2560     {
2561     GETCHARINC(d, eptr);
2562     if (d < 256) d = md->lcc[d];
2563     if (fc == d) RRETURN(MATCH_NOMATCH);
2564     }
2565     }
2566     else
2567     #endif
2568    
2569     /* Not UTF-8 mode */
2570     {
2571     for (i = 1; i <= min; i++)
2572     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2573     }
2574    
2575     if (min == max) continue;
2576    
2577     if (minimize)
2578     {
2579     #ifdef SUPPORT_UTF8
2580     /* UTF-8 mode */
2581     if (utf8)
2582     {
2583 nigel 93 register unsigned int d;
2584 nigel 77 for (fi = min;; fi++)
2585     {
2586 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2587 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2588     GETCHARINC(d, eptr);
2589     if (d < 256) d = md->lcc[d];
2590     if (fi >= max || eptr >= md->end_subject || fc == d)
2591     RRETURN(MATCH_NOMATCH);
2592     }
2593     }
2594     else
2595     #endif
2596     /* Not UTF-8 mode */
2597     {
2598     for (fi = min;; fi++)
2599     {
2600 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2601 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2602     if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2603     RRETURN(MATCH_NOMATCH);
2604     }
2605     }
2606     /* Control never gets here */
2607     }
2608    
2609     /* Maximize case */
2610    
2611     else
2612     {
2613     pp = eptr;
2614    
2615     #ifdef SUPPORT_UTF8
2616     /* UTF-8 mode */
2617     if (utf8)
2618     {
2619 nigel 93 register unsigned int d;
2620 nigel 77 for (i = min; i < max; i++)
2621     {
2622     int len = 1;
2623     if (eptr >= md->end_subject) break;
2624     GETCHARLEN(d, eptr, len);
2625     if (d < 256) d = md->lcc[d];
2626     if (fc == d) break;
2627     eptr += len;
2628     }
2629 nigel 93 if (possessive) continue;
2630     for(;;)
2631 nigel 77 {
2632 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2633 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2634     if (eptr-- == pp) break; /* Stop if tried at original pos */
2635     BACKCHAR(eptr);
2636     }
2637     }
2638     else
2639     #endif
2640     /* Not UTF-8 mode */
2641     {
2642     for (i = min; i < max; i++)
2643     {
2644     if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2645     eptr++;
2646     }
2647 nigel 93 if (possessive) continue;
2648 nigel 77 while (eptr >= pp)
2649     {
2650 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2651 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652     eptr--;
2653     }
2654     }
2655    
2656     RRETURN(MATCH_NOMATCH);
2657     }
2658     /* Control never gets here */
2659     }
2660    
2661     /* Caseful comparisons */
2662    
2663     else
2664     {
2665     #ifdef SUPPORT_UTF8
2666     /* UTF-8 mode */
2667     if (utf8)
2668     {
2669 nigel 93 register unsigned int d;
2670 nigel 77 for (i = 1; i <= min; i++)
2671     {
2672     GETCHARINC(d, eptr);
2673     if (fc == d) RRETURN(MATCH_NOMATCH);
2674     }
2675     }
2676     else
2677     #endif
2678     /* Not UTF-8 mode */
2679     {
2680     for (i = 1; i <= min; i++)
2681     if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2682     }
2683    
2684     if (min == max) continue;
2685    
2686     if (minimize)
2687     {
2688     #ifdef SUPPORT_UTF8
2689     /* UTF-8 mode */
2690     if (utf8)
2691     {
2692 nigel 93 register unsigned int d;
2693 nigel 77 for (fi = min;; fi++)
2694     {
2695 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2696 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697     GETCHARINC(d, eptr);
2698     if (fi >= max || eptr >= md->end_subject || fc == d)
2699     RRETURN(MATCH_NOMATCH);
2700     }
2701     }
2702     else
2703     #endif
2704     /* Not UTF-8 mode */
2705     {
2706     for (fi = min;; fi++)
2707     {
2708 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2709 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2710     if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2711     RRETURN(MATCH_NOMATCH);
2712     }
2713     }
2714     /* Control never gets here */
2715     }
2716    
2717     /* Maximize case */
2718    
2719     else
2720     {
2721     pp = eptr;
2722    
2723     #ifdef SUPPORT_UTF8
2724     /* UTF-8 mode */
2725     if (utf8)
2726     {
2727 nigel 93 register unsigned int d;
2728 nigel 77 for (i = min; i < max; i++)
2729     {
2730     int len = 1;
2731     if (eptr >= md->end_subject) break;
2732     GETCHARLEN(d, eptr, len);
2733     if (fc == d) break;
2734     eptr += len;
2735     }
2736 nigel 93 if (possessive) continue;
2737 nigel 77 for(;;)
2738     {
2739 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2740 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2741     if (eptr-- == pp) break; /* Stop if tried at original pos */
2742     BACKCHAR(eptr);
2743     }
2744     }
2745     else
2746     #endif
2747     /* Not UTF-8 mode */
2748     {
2749     for (i = min; i < max; i++)
2750     {
2751     if (eptr >= md->end_subject || fc == *eptr) break;
2752     eptr++;
2753     }
2754 nigel 93 if (possessive) continue;
2755 nigel 77 while (eptr >= pp)
2756     {
2757 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2758 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2759     eptr--;
2760     }
2761     }
2762    
2763     RRETURN(MATCH_NOMATCH);
2764     }
2765     }
2766     /* Control never gets here */
2767    
2768     /* Match a single character type repeatedly; several different opcodes
2769     share code. This is very similar to the code for single characters, but we
2770     repeat it in the interests of efficiency. */
2771    
2772     case OP_TYPEEXACT:
2773     min = max = GET2(ecode, 1);
2774     minimize = TRUE;
2775     ecode += 3;
2776     goto REPEATTYPE;
2777    
2778     case OP_TYPEUPTO:
2779     case OP_TYPEMINUPTO:
2780     min = 0;
2781     max = GET2(ecode, 1);
2782     minimize = *ecode == OP_TYPEMINUPTO;
2783     ecode += 3;
2784     goto REPEATTYPE;
2785    
2786 nigel 93 case OP_TYPEPOSSTAR:
2787     possessive = TRUE;
2788     min = 0;
2789     max = INT_MAX;
2790     ecode++;
2791     goto REPEATTYPE;
2792    
2793     case OP_TYPEPOSPLUS:
2794     possessive = TRUE;
2795     min = 1;
2796     max = INT_MAX;
2797     ecode++;
2798     goto REPEATTYPE;
2799    
2800     case OP_TYPEPOSQUERY:
2801     possessive = TRUE;
2802     min = 0;
2803     max = 1;
2804     ecode++;
2805     goto REPEATTYPE;
2806    
2807     case OP_TYPEPOSUPTO:
2808     possessive = TRUE;
2809     min = 0;
2810     max = GET2(ecode, 1);
2811     ecode += 3;
2812     goto REPEATTYPE;
2813    
2814 nigel 77 case OP_TYPESTAR:
2815     case OP_TYPEMINSTAR:
2816     case OP_TYPEPLUS:
2817     case OP_TYPEMINPLUS:
2818     case OP_TYPEQUERY:
2819     case OP_TYPEMINQUERY:
2820     c = *ecode++ - OP_TYPESTAR;
2821     minimize = (c & 1) != 0;
2822     min = rep_min[c]; /* Pick up values from tables; */
2823     max = rep_max[c]; /* zero for max => infinity */
2824     if (max == 0) max = INT_MAX;
2825    
2826     /* Common code for all repeated single character type matches. Note that
2827     in UTF-8 mode, '.' matches a character of any length, but for the other
2828     character types, the valid characters are all one-byte long. */
2829    
2830     REPEATTYPE:
2831     ctype = *ecode++; /* Code for the character type */
2832    
2833     #ifdef SUPPORT_UCP
2834     if (ctype == OP_PROP || ctype == OP_NOTPROP)
2835     {
2836     prop_fail_result = ctype == OP_NOTPROP;
2837     prop_type = *ecode++;
2838 nigel 87 prop_value = *ecode++;
2839 nigel 77 }
2840     else prop_type = -1;
2841     #endif
2842    
2843     /* First, ensure the minimum number of matches are present. Use inline
2844     code for maximizing the speed, and do the type test once at the start
2845     (i.e. keep it out of the loop). Also we can test that there are at least
2846     the minimum number of bytes before we start. This isn't as effective in
2847     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2848     is tidier. Also separate the UCP code, which can be the same for both UTF-8
2849     and single-bytes. */
2850    
2851     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2852     if (min > 0)
2853     {
2854     #ifdef SUPPORT_UCP
2855 nigel 87 if (prop_type >= 0)
2856 nigel 77 {
2857 nigel 87 switch(prop_type)
2858 nigel 77 {
2859 nigel 87 case PT_ANY:
2860     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2861     for (i = 1; i <= min; i++)
2862     {
2863     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2864 ph10 184 GETCHARINCTEST(c, eptr);
2865 nigel 87 }
2866     break;
2867    
2868     case PT_LAMP:
2869     for (i = 1; i <= min; i++)
2870     {
2871     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2872 ph10 184 GETCHARINCTEST(c, eptr);
2873 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2874     if ((prop_chartype == ucp_Lu ||
2875     prop_chartype == ucp_Ll ||
2876     prop_chartype == ucp_Lt) == prop_fail_result)
2877     RRETURN(MATCH_NOMATCH);
2878     }
2879     break;
2880    
2881     case PT_GC:
2882     for (i = 1; i <= min; i++)
2883     {
2884     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2885 ph10 184 GETCHARINCTEST(c, eptr);
2886 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2887     if ((prop_category == prop_value) == prop_fail_result)
2888     RRETURN(MATCH_NOMATCH);
2889     }
2890     break;
2891    
2892     case PT_PC:
2893     for (i = 1; i <= min; i++)
2894     {
2895     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2896 ph10 184 GETCHARINCTEST(c, eptr);
2897 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2898     if ((prop_chartype == prop_value) == prop_fail_result)
2899     RRETURN(MATCH_NOMATCH);
2900     }
2901     break;
2902    
2903     case PT_SC:
2904     for (i = 1; i <= min; i++)
2905     {
2906     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2907 ph10 184 GETCHARINCTEST(c, eptr);
2908 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2909     if ((prop_script == prop_value) == prop_fail_result)
2910     RRETURN(MATCH_NOMATCH);
2911     }
2912     break;
2913    
2914     default:
2915     RRETURN(PCRE_ERROR_INTERNAL);
2916 nigel 77 }
2917     }
2918    
2919     /* Match extended Unicode sequences. We will get here only if the
2920     support is in the binary; otherwise a compile-time error occurs. */
2921    
2922     else if (ctype == OP_EXTUNI)
2923     {
2924     for (i = 1; i <= min; i++)
2925     {
2926     GETCHARINCTEST(c, eptr);
2927 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2928 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2929     while (eptr < md->end_subject)
2930     {
2931     int len = 1;
2932     if (!utf8) c = *eptr; else
2933     {
2934     GETCHARLEN(c, eptr, len);
2935     }
2936 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2937 nigel 77 if (prop_category != ucp_M) break;
2938     eptr += len;
2939     }
2940     }
2941     }
2942    
2943     else
2944     #endif /* SUPPORT_UCP */
2945    
2946     /* Handle all other cases when the coding is UTF-8 */
2947    
2948     #ifdef SUPPORT_UTF8
2949     if (utf8) switch(ctype)
2950     {
2951     case OP_ANY:
2952     for (i = 1; i <= min; i++)
2953     {
2954 ph10 342 if (eptr >= md->end_subject || IS_NEWLINE(eptr))
2955 nigel 77 RRETURN(MATCH_NOMATCH);
2956 nigel 91 eptr++;
2957 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2958     }
2959     break;
2960    
2961 ph10 341 case OP_ALLANY:
2962     for (i = 1; i <= min; i++)
2963     {
2964     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2965     eptr++;
2966     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2967     }
2968     break;
2969    
2970 nigel 77 case OP_ANYBYTE:
2971     eptr += min;
2972     break;
2973    
2974 nigel 93 case OP_ANYNL:
2975     for (i = 1; i <= min; i++)
2976     {
2977     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2978     GETCHARINC(c, eptr);
2979     switch(c)
2980     {
2981     default: RRETURN(MATCH_NOMATCH);
2982     case 0x000d:
2983     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2984     break;
2985 ph10 231
2986 nigel 93 case 0x000a:
2987 ph10 231 break;
2988    
2989 nigel 93 case 0x000b:
2990     case 0x000c:
2991     case 0x0085:
2992     case 0x2028:
2993     case 0x2029:
2994 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2995 nigel 93 break;
2996     }
2997     }
2998     break;
2999    
3000 ph10 178 case OP_NOT_HSPACE:
3001     for (i = 1; i <= min; i++)
3002     {
3003     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3004     GETCHARINC(c, eptr);
3005     switch(c)
3006     {
3007     default: break;
3008     case 0x09: /* HT */
3009     case 0x20: /* SPACE */
3010     case 0xa0: /* NBSP */
3011     case 0x1680: /* OGHAM SPACE MARK */
3012     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3013     case 0x2000: /* EN QUAD */
3014     case 0x2001: /* EM QUAD */
3015     case 0x2002: /* EN SPACE */
3016     case 0x2003: /* EM SPACE */
3017     case 0x2004: /* THREE-PER-EM SPACE */
3018     case 0x2005: /* FOUR-PER-EM SPACE */
3019     case 0x2006: /* SIX-PER-EM SPACE */
3020     case 0x2007: /* FIGURE SPACE */
3021     case 0x2008: /* PUNCTUATION SPACE */
3022     case 0x2009: /* THIN SPACE */
3023     case 0x200A: /* HAIR SPACE */
3024     case 0x202f: /* NARROW NO-BREAK SPACE */
3025     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3026     case 0x3000: /* IDEOGRAPHIC SPACE */
3027     RRETURN(MATCH_NOMATCH);
3028     }
3029     }
3030     break;
3031 ph10 182
3032 ph10 178 case OP_HSPACE:
3033     for (i = 1; i <= min; i++)
3034     {
3035     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3036     GETCHARINC(c, eptr);
3037     switch(c)
3038     {
3039     default: RRETURN(MATCH_NOMATCH);
3040     case 0x09: /* HT */
3041     case 0x20: /* SPACE */
3042     case 0xa0: /* NBSP */
3043     case 0x1680: /* OGHAM SPACE MARK */
3044     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3045     case 0x2000: /* EN QUAD */
3046     case 0x2001: /* EM QUAD */
3047     case 0x2002: /* EN SPACE */
3048     case 0x2003: /* EM SPACE */
3049     case 0x2004: /* THREE-PER-EM SPACE */
3050     case 0x2005: /* FOUR-PER-EM SPACE */
3051     case 0x2006: /* SIX-PER-EM SPACE */
3052     case 0x2007: /* FIGURE SPACE */
3053     case 0x2008: /* PUNCTUATION SPACE */
3054     case 0x2009: /* THIN SPACE */
3055     case 0x200A: /* HAIR SPACE */
3056     case 0x202f: /* NARROW NO-BREAK SPACE */
3057     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3058     case 0x3000: /* IDEOGRAPHIC SPACE */
3059     break;
3060     }
3061     }
3062     break;
3063 ph10 182
3064 ph10 178 case OP_NOT_VSPACE:
3065     for (i = 1; i <= min; i++)
3066     {
3067     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3068     GETCHARINC(c, eptr);
3069     switch(c)
3070     {
3071     default: break;
3072     case 0x0a: /* LF */
3073     case 0x0b: /* VT */
3074     case 0x0c: /* FF */
3075     case 0x0d: /* CR */
3076     case 0x85: /* NEL */
3077     case 0x2028: /* LINE SEPARATOR */
3078     case 0x2029: /* PARAGRAPH SEPARATOR */
3079     RRETURN(MATCH_NOMATCH);
3080     }
3081     }
3082     break;
3083 ph10 182
3084 ph10 178 case OP_VSPACE:
3085     for (i = 1; i <= min; i++)
3086     {
3087     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3088     GETCHARINC(c, eptr);
3089     switch(c)
3090     {
3091     default: RRETURN(MATCH_NOMATCH);
3092     case 0x0a: /* LF */
3093     case 0x0b: /* VT */
3094     case 0x0c: /* FF */
3095     case 0x0d: /* CR */
3096     case 0x85: /* NEL */
3097     case 0x2028: /* LINE SEPARATOR */
3098     case 0x2029: /* PARAGRAPH SEPARATOR */
3099 ph10 182 break;
3100 ph10 178 }
3101     }
3102     break;
3103    
3104 nigel 77 case OP_NOT_DIGIT:
3105     for (i = 1; i <= min; i++)
3106     {
3107     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3108     GETCHARINC(c, eptr);
3109     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3110     RRETURN(MATCH_NOMATCH);
3111     }
3112     break;
3113    
3114     case OP_DIGIT:
3115     for (i = 1; i <= min; i++)
3116     {
3117     if (eptr >= md->end_subject ||
3118     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3119     RRETURN(MATCH_NOMATCH);
3120     /* No need to skip more bytes - we know it's a 1-byte character */
3121     }
3122     break;
3123    
3124     case OP_NOT_WHITESPACE:
3125     for (i = 1; i <= min; i++)
3126     {
3127     if (eptr >= md->end_subject ||
3128 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3129 nigel 77 RRETURN(MATCH_NOMATCH);
3130 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3131 nigel 77 }
3132     break;
3133    
3134     case OP_WHITESPACE:
3135     for (i = 1; i <= min; i++)
3136     {
3137     if (eptr >= md->end_subject ||
3138     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3139     RRETURN(MATCH_NOMATCH);
3140     /* No need to skip more bytes - we know it's a 1-byte character */
3141     }
3142     break;
3143    
3144     case OP_NOT_WORDCHAR:
3145     for (i = 1; i <= min; i++)
3146     {
3147     if (eptr >= md->end_subject ||
3148 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3149 nigel 77 RRETURN(MATCH_NOMATCH);
3150 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3151 nigel 77 }
3152     break;
3153    
3154     case OP_WORDCHAR:
3155     for (i = 1; i <= min; i++)
3156     {
3157     if (eptr >= md->end_subject ||
3158     *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3159     RRETURN(MATCH_NOMATCH);
3160     /* No need to skip more bytes - we know it's a 1-byte character */
3161     }
3162     break;
3163    
3164     default:
3165     RRETURN(PCRE_ERROR_INTERNAL);
3166     } /* End switch(ctype) */
3167    
3168     else
3169     #endif /* SUPPORT_UTF8 */
3170    
3171     /* Code for the non-UTF-8 case for minimum matching of operators other
3172 nigel 93 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3173     number of bytes present, as this was tested above. */
3174 nigel 77
3175     switch(ctype)
3176     {
3177     case OP_ANY:
3178 ph10 342 for (i = 1; i <= min; i++)
3179 nigel 77 {
3180 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3181     eptr++;
3182 nigel 77 }
3183     break;
3184    
3185 ph10 341 case OP_ALLANY:
3186     eptr += min;
3187     break;
3188    
3189 nigel 77 case OP_ANYBYTE:
3190     eptr += min;
3191     break;
3192    
3193 nigel 93 /* Because of the CRLF case, we can't assume the minimum number of
3194     bytes are present in this case. */
3195    
3196     case OP_ANYNL:
3197     for (i = 1; i <= min; i++)
3198     {
3199     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3200     switch(*eptr++)
3201     {
3202     default: RRETURN(MATCH_NOMATCH);
3203     case 0x000d:
3204     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3205     break;
3206     case 0x000a:
3207 ph10 231 break;
3208    
3209 nigel 93 case 0x000b:
3210     case 0x000c:
3211     case 0x0085:
3212 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3213 nigel 93 break;
3214     }
3215     }
3216     break;
3217    
3218 ph10 178 case OP_NOT_HSPACE:
3219     for (i = 1; i <= min; i++)
3220     {
3221     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3222     switch(*eptr++)
3223     {
3224     default: break;
3225     case 0x09: /* HT */
3226     case 0x20: /* SPACE */
3227     case 0xa0: /* NBSP */
3228     RRETURN(MATCH_NOMATCH);
3229     }
3230     }
3231     break;
3232    
3233     case OP_HSPACE:
3234     for (i = 1; i <= min; i++)
3235     {
3236     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3237     switch(*eptr++)
3238     {
3239     default: RRETURN(MATCH_NOMATCH);
3240     case 0x09: /* HT */
3241     case 0x20: /* SPACE */
3242     case 0xa0: /* NBSP */
3243 ph10 182 break;
3244 ph10 178 }
3245     }
3246     break;
3247    
3248     case OP_NOT_VSPACE:
3249     for (i = 1; i <= min; i++)
3250     {
3251     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3252     switch(*eptr++)
3253     {
3254     default: break;
3255     case 0x0a: /* LF */
3256     case 0x0b: /* VT */
3257     case 0x0c: /* FF */
3258     case 0x0d: /* CR */
3259     case 0x85: /* NEL */
3260     RRETURN(MATCH_NOMATCH);
3261     }
3262     }
3263     break;
3264    
3265     case OP_VSPACE:
3266     for (i = 1; i <= min; i++)
3267     {
3268     if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3269     switch(*eptr++)
3270     {
3271     default: RRETURN(MATCH_NOMATCH);
3272     case 0x0a: /* LF */
3273     case 0x0b: /* VT */
3274     case 0x0c: /* FF */
3275     case 0x0d: /* CR */
3276     case 0x85: /* NEL */
3277 ph10 182 break;
3278 ph10 178 }
3279     }
3280     break;
3281    
3282 nigel 77 case OP_NOT_DIGIT:
3283     for (i = 1; i <= min; i++)
3284     if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3285     break;
3286    
3287     case OP_DIGIT:
3288     for (i = 1; i <= min; i++)
3289     if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3290     break;
3291    
3292     case OP_NOT_WHITESPACE:
3293     for (i = 1; i <= min; i++)
3294     if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3295     break;
3296    
3297     case OP_WHITESPACE:
3298     for (i = 1; i <= min; i++)
3299     if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3300     break;
3301    
3302     case OP_NOT_WORDCHAR:
3303     for (i = 1; i <= min; i++)
3304     if ((md->ctypes[*eptr++] & ctype_word) != 0)
3305     RRETURN(MATCH_NOMATCH);
3306     break;
3307    
3308     case OP_WORDCHAR:
3309     for (i = 1; i <= min; i++)
3310     if ((md->ctypes[*eptr++] & ctype_word) == 0)
3311     RRETURN(MATCH_NOMATCH);
3312     break;
3313    
3314     default:
3315     RRETURN(PCRE_ERROR_INTERNAL);
3316     }
3317     }
3318    
3319     /* If min = max, continue at the same level without recursing */
3320    
3321     if (min == max) continue;
3322    
3323     /* If minimizing, we have to test the rest of the pattern before each
3324     subsequent match. Again, separate the UTF-8 case for speed, and also
3325     separate the UCP cases. */
3326    
3327     if (minimize)
3328     {
3329     #ifdef SUPPORT_UCP
3330 nigel 87 if (prop_type >= 0)
3331 nigel 77 {
3332 nigel 87 switch(prop_type)
3333 nigel 77 {
3334 nigel 87 case PT_ANY:
3335     for (fi = min;; fi++)
3336     {
3337 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3338 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3339     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3340     GETCHARINC(c, eptr);
3341     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3342     }
3343 nigel 93 /* Control never gets here */
3344 nigel 87
3345     case PT_LAMP:
3346     for (fi = min;; fi++)
3347     {
3348 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3349 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3350     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3351     GETCHARINC(c, eptr);
3352     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3353     if ((prop_chartype == ucp_Lu ||
3354     prop_chartype == ucp_Ll ||
3355     prop_chartype == ucp_Lt) == prop_fail_result)
3356     RRETURN(MATCH_NOMATCH);
3357     }
3358 nigel 93 /* Control never gets here */
3359 nigel 87
3360     case PT_GC:
3361     for (fi = min;; fi++)
3362     {
3363 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3364 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3366     GETCHARINC(c, eptr);
3367     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3368     if ((prop_category == prop_value) == prop_fail_result)
3369     RRETURN(MATCH_NOMATCH);
3370     }
3371 nigel 93 /* Control never gets here */
3372 nigel 87
3373     case PT_PC:
3374     for (fi = min;; fi++)
3375     {
3376 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3377 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3378     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3379     GETCHARINC(c, eptr);
3380     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3381     if ((prop_chartype == prop_value) == prop_fail_result)
3382     RRETURN(MATCH_NOMATCH);
3383     }
3384 nigel 93 /* Control never gets here */
3385 nigel 87
3386     case PT_SC:
3387     for (fi = min;; fi++)
3388     {
3389 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3390 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3391     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3392     GETCHARINC(c, eptr);
3393     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3394     if ((prop_script == prop_value) == prop_fail_result)
3395     RRETURN(MATCH_NOMATCH);
3396     }
3397 nigel 93 /* Control never gets here */
3398 nigel 87
3399     default:
3400     RRETURN(PCRE_ERROR_INTERNAL);
3401 nigel 77 }
3402     }
3403    
3404     /* Match extended Unicode sequences. We will get here only if the
3405     support is in the binary; otherwise a compile-time error occurs. */
3406    
3407     else if (ctype == OP_EXTUNI)
3408     {
3409     for (fi = min;; fi++)
3410     {
3411 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3412 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3413     if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3414     GETCHARINCTEST(c, eptr);
3415 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3416 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3417     while (eptr < md->end_subject)
3418     {
3419     int len = 1;
3420     if (!utf8) c = *eptr; else
3421     {
3422     GETCHARLEN(c, eptr, len);
3423     }
3424 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3425 nigel 77 if (prop_category != ucp_M) break;
3426     eptr += len;
3427     }
3428     }
3429     }
3430    
3431     else
3432     #endif /* SUPPORT_UCP */
3433    
3434     #ifdef SUPPORT_UTF8
3435     /* UTF-8 mode */
3436     if (utf8)
3437     {
3438     for (fi = min;; fi++)
3439     {
3440 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3441 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3442 nigel 91 if (fi >= max || eptr >= md->end_subject ||
3443 ph10 342 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3444 nigel 91 RRETURN(MATCH_NOMATCH);
3445 nigel 77
3446     GETCHARINC(c, eptr);
3447     switch(ctype)
3448     {
3449 ph10 342 case OP_ANY: /* This is the non-NL case */
3450 ph10 345 case OP_ALLANY:
3451 nigel 77 case OP_ANYBYTE:
3452     break;
3453    
3454 nigel 93 case OP_ANYNL:
3455     switch(c)
3456     {
3457     default: RRETURN(MATCH_NOMATCH);
3458     case 0x000d:
3459     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3460     break;
3461     case 0x000a:
3462 ph10 231 break;
3463    
3464 nigel 93 case 0x000b:
3465     case 0x000c:
3466     case 0x0085:
3467     case 0x2028:
3468     case 0x2029:
3469 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3470 nigel 93 break;
3471     }
3472     break;
3473    
3474 ph10 178 case OP_NOT_HSPACE:
3475     switch(c)
3476     {
3477     default: break;
3478     case 0x09: /* HT */
3479     case 0x20: /* SPACE */
3480     case 0xa0: /* NBSP */
3481     case 0x1680: /* OGHAM SPACE MARK */
3482     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3483     case 0x2000: /* EN QUAD */
3484     case 0x2001: /* EM QUAD */
3485     case 0x2002: /* EN SPACE */
3486     case 0x2003: /* EM SPACE */
3487     case 0x2004: /* THREE-PER-EM SPACE */
3488     case 0x2005: /* FOUR-PER-EM SPACE */
3489     case 0x2006: /* SIX-PER-EM SPACE */
3490     case 0x2007: /* FIGURE SPACE */
3491     case 0x2008: /* PUNCTUATION SPACE */
3492     case 0x2009: /* THIN SPACE */
3493     case 0x200A: /* HAIR SPACE */
3494     case 0x202f: /* NARROW NO-BREAK SPACE */
3495     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3496     case 0x3000: /* IDEOGRAPHIC SPACE */
3497     RRETURN(MATCH_NOMATCH);
3498     }
3499     break;
3500    
3501     case OP_HSPACE:
3502     switch(c)
3503     {
3504     default: RRETURN(MATCH_NOMATCH);
3505     case 0x09: /* HT */
3506     case 0x20: /* SPACE */
3507     case 0xa0: /* NBSP */
3508     case 0x1680: /* OGHAM SPACE MARK */
3509     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3510     case 0x2000: /* EN QUAD */
3511     case 0x2001: /* EM QUAD */
3512     case 0x2002: /* EN SPACE */
3513     case 0x2003: /* EM SPACE */
3514     case 0x2004: /* THREE-PER-EM SPACE */
3515     case 0x2005: /* FOUR-PER-EM SPACE */
3516     case 0x2006: /* SIX-PER-EM SPACE */
3517     case 0x2007: /* FIGURE SPACE */
3518     case 0x2008: /* PUNCTUATION SPACE */
3519     case 0x2009: /* THIN SPACE */
3520     case 0x200A: /* HAIR SPACE */
3521     case 0x202f: /* NARROW NO-BREAK SPACE */
3522     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3523     case 0x3000: /* IDEOGRAPHIC SPACE */
3524     break;
3525     }
3526     break;
3527    
3528     case OP_NOT_VSPACE:
3529     switch(c)
3530     {
3531     default: break;
3532     case 0x0a: /* LF */
3533     case 0x0b: /* VT */
3534     case 0x0c: /* FF */
3535     case 0x0d: /* CR */
3536     case 0x85: /* NEL */
3537     case 0x2028: /* LINE SEPARATOR */
3538     case 0x2029: /* PARAGRAPH SEPARATOR */
3539     RRETURN(MATCH_NOMATCH);
3540     }
3541     break;
3542    
3543     case OP_VSPACE:
3544     switch(c)
3545     {
3546     default: RRETURN(MATCH_NOMATCH);
3547     case 0x0a: /* LF */
3548     case 0x0b: /* VT */
3549     case 0x0c: /* FF */
3550     case 0x0d: /* CR */
3551     case 0x85: /* NEL */
3552     case 0x2028: /* LINE SEPARATOR */
3553     case 0x2029: /* PARAGRAPH SEPARATOR */
3554     break;
3555     }
3556     break;
3557    
3558 nigel 77 case OP_NOT_DIGIT:
3559     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3560     RRETURN(MATCH_NOMATCH);
3561     break;
3562    
3563     case OP_DIGIT:
3564     if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3565     RRETURN(MATCH_NOMATCH);
3566     break;
3567    
3568     case OP_NOT_WHITESPACE:
3569     if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3570     RRETURN(MATCH_NOMATCH);
3571     break;
3572    
3573     case OP_WHITESPACE:
3574     if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3575     RRETURN(MATCH_NOMATCH);
3576     break;
3577    
3578     case OP_NOT_WORDCHAR:
3579     if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3580     RRETURN(MATCH_NOMATCH);
3581     break;
3582    
3583     case OP_WORDCHAR:
3584     if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3585     RRETURN(MATCH_NOMATCH);
3586     break;
3587    
3588     default:
3589     RRETURN(PCRE_ERROR_INTERNAL);
3590     }
3591     }
3592     }
3593     else
3594     #endif
3595     /* Not UTF-8 mode */
3596     {
3597     for (fi = min;; fi++)
3598     {
3599 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3600 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3601 nigel 91 if (fi >= max || eptr >= md->end_subject ||
3602 ph10 342 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3603 nigel 91 RRETURN(MATCH_NOMATCH);
3604    
3605 nigel 77 c = *eptr++;
3606     switch(ctype)
3607     {
3608 ph10 342 case OP_ANY: /* This is the non-NL case */
3609 ph10 345 case OP_ALLANY:
3610 nigel 77 case OP_ANYBYTE:
3611     break;
3612    
3613 nigel 93 case OP_ANYNL:
3614     switch(c)
3615     {
3616     default: RRETURN(MATCH_NOMATCH);
3617     case 0x000d:
3618     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3619     break;
3620 ph10 231
3621 nigel 93 case 0x000a:
3622 ph10 231 break;
3623    
3624 nigel 93 case 0x000b:
3625     case 0x000c:
3626     case 0x0085:
3627 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3628 nigel 93 break;
3629     }
3630     break;
3631    
3632 ph10 178 case OP_NOT_HSPACE:
3633     switch(c)
3634     {
3635     default: break;
3636     case 0x09: /* HT */
3637     case 0x20: /* SPACE */
3638     case 0xa0: /* NBSP */
3639     RRETURN(MATCH_NOMATCH);
3640     }
3641     break;
3642    
3643     case OP_HSPACE:
3644     switch(c)
3645     {
3646     default: RRETURN(MATCH_NOMATCH);
3647     case 0x09: /* HT */
3648     case 0x20: /* SPACE */
3649     case 0xa0: /* NBSP */
3650     break;
3651     }
3652     break;
3653    
3654     case OP_NOT_VSPACE:
3655     switch(c)
3656     {
3657     default: break;
3658     case 0x0a: /* LF */
3659     case 0x0b: /* VT */
3660     case 0x0c: /* FF */
3661     case 0x0d: /* CR */
3662     case 0x85: /* NEL */
3663     RRETURN(MATCH_NOMATCH);
3664     }
3665     break;
3666    
3667     case OP_VSPACE:
3668     switch(c)
3669     {
3670     default: RRETURN(MATCH_NOMATCH);
3671     case 0x0a: /* LF */
3672     case 0x0b: /* VT */
3673     case 0x0c: /* FF */
3674     case 0x0d: /* CR */
3675     case 0x85: /* NEL */
3676     break;
3677     }
3678     break;
3679    
3680 nigel 77 case OP_NOT_DIGIT:
3681     if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3682     break;
3683    
3684     case OP_DIGIT:
3685     if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3686     break;
3687    
3688     case OP_NOT_WHITESPACE:
3689     if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3690     break;
3691    
3692     case OP_WHITESPACE:
3693     if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3694     break;
3695    
3696     case OP_NOT_WORDCHAR:
3697     if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3698     break;
3699    
3700     case OP_WORDCHAR:
3701     if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3702     break;
3703    
3704     default:
3705     RRETURN(PCRE_ERROR_INTERNAL);
3706     }
3707     }
3708     }
3709     /* Control never gets here */
3710     }
3711    
3712 nigel 93 /* If maximizing, it is worth using inline code for speed, doing the type
3713 nigel 77 test once at the start (i.e. keep it out of the loop). Again, keep the
3714     UTF-8 and UCP stuff separate. */
3715    
3716     else
3717     {
3718     pp = eptr; /* Remember where we started */
3719    
3720     #ifdef SUPPORT_UCP
3721 nigel 87 if (prop_type >= 0)
3722 nigel 77 {
3723 nigel 87 switch(prop_type)
3724 nigel 77 {
3725 nigel 87 case PT_ANY:
3726     for (i = min; i < max; i++)
3727     {
3728     int len = 1;
3729     if (eptr >= md->end_subject) break;
3730     GETCHARLEN(c, eptr, len);
3731     if (prop_fail_result) break;
3732     eptr+= len;
3733     }
3734     break;
3735    
3736     case PT_LAMP:
3737     for (i = min; i < max; i++)
3738     {
3739     int len = 1;
3740     if (eptr >= md->end_subject) break;
3741     GETCHARLEN(c, eptr, len);
3742     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3743     if ((prop_chartype == ucp_Lu ||
3744     prop_chartype == ucp_Ll ||
3745     prop_chartype == ucp_Lt) == prop_fail_result)
3746     break;
3747     eptr+= len;
3748     }
3749     break;
3750    
3751     case PT_GC:
3752     for (i = min; i < max; i++)
3753     {
3754     int len = 1;
3755     if (eptr >= md->end_subject) break;
3756     GETCHARLEN(c, eptr, len);
3757     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3758     if ((prop_category == prop_value) == prop_fail_result)
3759     break;
3760     eptr+= len;
3761     }
3762     break;
3763    
3764     case PT_PC:
3765     for (i = min; i < max; i++)
3766     {
3767     int len = 1;
3768     if (eptr >= md->end_subject) break;
3769     GETCHARLEN(c, eptr, len);
3770     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3771     if ((prop_chartype == prop_value) == prop_fail_result)
3772     break;
3773     eptr+= len;
3774     }
3775     break;
3776    
3777     case PT_SC:
3778     for (i = min; i < max; i++)
3779     {
3780     int len = 1;
3781     if (eptr >= md->end_subject) break;
3782     GETCHARLEN(c, eptr, len);
3783     prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3784     if ((prop_script == prop_value) == prop_fail_result)
3785     break;
3786     eptr+= len;
3787     }
3788     break;
3789 nigel 77 }
3790    
3791     /* eptr is now past the end of the maximum run */
3792    
3793 nigel 93 if (possessive) continue;
3794 nigel 77 for(;;)
3795     {
3796 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3797 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3798     if (eptr-- == pp) break; /* Stop if tried at original pos */
3799 ph10 207 if (utf8) BACKCHAR(eptr);
3800 nigel 77 }
3801     }
3802    
3803     /* Match extended Unicode sequences. We will get here only if the
3804     support is in the binary; otherwise a compile-time error occurs. */
3805    
3806     else if (ctype == OP_EXTUNI)
3807     {
3808     for (i = min; i < max; i++)
3809     {
3810     if (eptr >= md->end_subject) break;
3811     GETCHARINCTEST(c, eptr);
3812 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3813 nigel 77 if (prop_category == ucp_M) break;
3814     while (eptr < md->end_subject)
3815     {
3816     int len = 1;
3817     if (!utf8) c = *eptr; else
3818     {
3819     GETCHARLEN(c, eptr, len);
3820     }
3821 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3822 nigel 77 if (prop_category != ucp_M) break;
3823     eptr += len;
3824     }
3825     }
3826    
3827     /* eptr is now past the end of the maximum run */
3828    
3829 nigel 93 if (possessive) continue;
3830 nigel 77 for(;;)
3831     {
3832 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3833 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3834     if (eptr-- == pp) break; /* Stop if tried at original pos */
3835     for (;;) /* Move back over one extended */
3836     {
3837     int len = 1;
3838     if (!utf8) c = *eptr; else
3839     {
3840 ph10 207 BACKCHAR(eptr);
3841 nigel 77 GETCHARLEN(c, eptr, len);
3842     }
3843 nigel 87 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3844 nigel 77 if (prop_category != ucp_M) break;
3845     eptr--;
3846     }
3847     }
3848     }
3849    
3850     else
3851     #endif /* SUPPORT_UCP */
3852    
3853     #ifdef SUPPORT_UTF8
3854     /* UTF-8 mode */
3855    
3856     if (utf8)
3857     {
3858     switch(ctype)
3859     {
3860     case OP_ANY:
3861     if (max < INT_MAX)
3862     {
3863 ph10 342 for (i = min; i < max; i++)
3864 nigel 77 {
3865 ph10 342 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3866     eptr++;
3867     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3868 nigel 77 }
3869     }
3870    
3871     /* Handle unlimited UTF-8 repeat */
3872    
3873     else
3874     {
3875 ph10 342 for (i = min; i < max; i++)
3876 nigel 77 {
3877 ph10 342 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3878     eptr++;
3879     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3880 nigel 77 }
3881     }
3882     break;
3883    
3884 ph10 341 case OP_ALLANY:
3885     if (max < INT_MAX)
3886     {
3887     for (i = min; i < max; i++)
3888     {
3889     if (eptr >= md->end_subject) break;
3890     eptr++;
3891     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3892     }
3893     }
3894     else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
3895     break;
3896    
3897 nigel 77 /* The byte case is the same as non-UTF8 */
3898    
3899     case OP_ANYBYTE:
3900     c = max - min;
3901 nigel 93 if (c > (unsigned int)(md->end_subject - eptr))
3902     c = md->end_subject - eptr;
3903 nigel 77 eptr += c;
3904     break;
3905    
3906 nigel 93 case OP_ANYNL:
3907     for (i = min; i < max; i++)
3908     {
3909     int len = 1;
3910     if (eptr >= md->end_subject) break;
3911     GETCHARLEN(c, eptr, len);
3912     if (c == 0x000d)
3913     {
3914     if (++eptr >= md->end_subject) break;
3915     if (*eptr == 0x000a) eptr++;
3916     }
3917     else
3918     {
3919 ph10 231 if (c != 0x000a &&
3920     (md->bsr_anycrlf ||
3921     (c != 0x000b && c != 0x000c &&
3922     c != 0x0085 && c != 0x2028 && c != 0x2029)))
3923 nigel 93 break;
3924     eptr += len;
3925     }
3926     }
3927     break;
3928    
3929 ph10 178 case OP_NOT_HSPACE:
3930 ph10 182 case OP_HSPACE:
3931 ph10 178 for (i = min; i < max; i++)
3932     {
3933 ph10 182 BOOL gotspace;
3934 ph10 178 int len = 1;
3935     if (eptr >= md->end_subject) break;
3936     GETCHARLEN(c, eptr, len);
3937     switch(c)
3938 ph10 182 {
3939     default: gotspace = FALSE; break;
3940 ph10 178 case 0x09: /* HT */
3941     case 0x20: /* SPACE */
3942     case 0xa0: /* NBSP */
3943     case 0x1680: /* OGHAM SPACE MARK */
3944     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3945     case 0x2000: /* EN QUAD */
3946     case 0x2001: /* EM QUAD */
3947     case 0x2002: /* EN SPACE */
3948     case 0x2003: /* EM SPACE */
3949     case 0x2004: /* THREE-PER-EM SPACE */
3950     case 0x2005: /* FOUR-PER-EM SPACE */
3951     case 0x2006: /* SIX-PER-EM SPACE */
3952     case 0x2007: /* FIGURE SPACE */
3953     case 0x2008: /* PUNCTUATION SPACE */
3954     case 0x2009: /* THIN SPACE */
3955     case 0x200A: /* HAIR SPACE */
3956     case 0x202f: /* NARROW NO-BREAK SPACE */
3957     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3958     case 0x3000: /* IDEOGRAPHIC SPACE */
3959     gotspace = TRUE;
3960 ph10 182 break;
3961 ph10 178 }
3962     if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3963     eptr += len;
3964     }
3965     break;
3966    
3967     case OP_NOT_VSPACE:
3968 ph10 182 case OP_VSPACE:
3969 ph10 178 for (i = min; i < max; i++)
3970     {
3971 ph10 182 BOOL gotspace;
3972 ph10 178 int len = 1;
3973     if (eptr >= md->end_subject) break;
3974     GETCHARLEN(c, eptr, len);
3975     switch(c)
3976     {
3977 ph10 182 default: gotspace = FALSE; break;
3978 ph10 178 case 0x0a: /* LF */
3979     case 0x0b: /* VT */
3980     case 0x0c: /* FF */
3981     case 0x0d: /* CR */
3982     case 0x85: /* NEL */
3983     case 0x2028: /* LINE SEPARATOR */
3984     case 0x2029: /* PARAGRAPH SEPARATOR */
3985     gotspace = TRUE;
3986     break;
3987     }
3988 ph10 182 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3989 ph10 178 eptr += len;
3990     }
3991     break;
3992    
3993 nigel 77 case OP_NOT_DIGIT:
3994     for (i = min; i < max; i++)
3995     {
3996     int len = 1;
3997     if (eptr >= md->end_subject) break;
3998     GETCHARLEN(c, eptr, len);
3999     if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4000     eptr+= len;
4001     }
4002     break;
4003    
4004     case OP_DIGIT:
4005     for (i = min; i < max; i++)
4006     {
4007     int len = 1;
4008     if (eptr >= md->end_subject) break;
4009     GETCHARLEN(c, eptr, len);
4010     if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4011     eptr+= len;
4012     }
4013     break;
4014    
4015     case OP_NOT_WHITESPACE:
4016     for (i = min; i < max; i++)
4017     {
4018     int len = 1;
4019     if (eptr >= md->end_subject) break;
4020     GETCHARLEN(c, eptr, len);
4021     if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4022     eptr+= len;
4023     }
4024     break;
4025    
4026     case OP_WHITESPACE:
4027     for (i = min; i < max; i++)
4028     {
4029     int len = 1;
4030     if (eptr >= md->end_subject) break;
4031     GETCHARLEN(c, eptr, len);
4032     if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4033     eptr+= len;
4034     }
4035     break;
4036    
4037     case OP_NOT_WORDCHAR:
4038     for (i = min; i < max; i++)
4039     {
4040     int len = 1;
4041     if (eptr >= md->end_subject) break;
4042     GETCHARLEN(c, eptr, len);
4043     if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4044     eptr+= len;
4045     }
4046     break;
4047    
4048     case OP_WORDCHAR:
4049     for (i = min; i < max; i++)
4050     {
4051     int len = 1;
4052     if (eptr >= md->end_subject) break;
4053     GETCHARLEN(c, eptr, len);
4054     if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4055     eptr+= len;
4056     }
4057     break;
4058    
4059     default:
4060     RRETURN(PCRE_ERROR_INTERNAL);
4061     }
4062    
4063     /* eptr is now past the end of the maximum run */
4064    
4065 nigel 93 if (possessive) continue;
4066 nigel 77 for(;;)
4067     {
4068 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4069 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4070     if (eptr-- == pp) break; /* Stop if tried at original pos */
4071     BACKCHAR(eptr);
4072     }
4073     }
4074     else
4075 ph10 207 #endif /* SUPPORT_UTF8 */
4076 nigel 77
4077     /* Not UTF-8 mode */
4078     {
4079     switch(ctype)
4080     {
4081     case OP_ANY:
4082 ph10 342 for (i = min; i < max; i++)
4083 nigel 77 {
4084 ph10 342 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4085     eptr++;
4086 nigel 77 }
4087 ph10 342 break;
4088 nigel 77
4089 ph10 341 case OP_ALLANY:
4090 nigel 77 case OP_ANYBYTE:
4091     c = max - min;
4092 nigel 93 if (c > (unsigned int)(md->end_subject - eptr))
4093     c = md->end_subject - eptr;
4094 nigel 77 eptr += c;
4095     break;
4096    
4097 nigel 93 case OP_ANYNL:
4098     for (i = min; i < max; i++)
4099     {
4100     if (eptr >= md->end_subject) break;
4101     c = *eptr;
4102     if (c == 0x000d)
4103     {
4104     if (++eptr >= md->end_subject) break;
4105     if (*eptr == 0x000a) eptr++;
4106     }
4107     else
4108     {
4109 ph10 231 if (c != 0x000a &&
4110     (md->bsr_anycrlf ||
4111     (c != 0x000b && c != 0x000c && c != 0x0085)))
4112 nigel 93 break;
4113     eptr++;
4114     }
4115     }
4116     break;
4117    
4118 ph10 178 case OP_NOT_HSPACE:
4119     for (i = min; i < max; i++)
4120     {
4121     if (eptr >= md->end_subject) break;
4122     c = *eptr;
4123     if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4124 ph10 182 eptr++;
4125 ph10 178 }
4126     break;
4127    
4128