/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 463 - (hide annotations) (download)
Sun Oct 18 10:02:46 2009 UTC (4 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 170608 byte(s)
Further tidies to partial matching.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 381 Copyright (c) 1997-2009 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92     #ifdef DEBUG
93     /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144     #ifdef DEBUG
145     if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 164 actuall used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 nigel 87 #ifdef DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 164 newframe->Xoffset_top = rc;\
292     newframe->Xims = re;\
293     newframe->Xeptrb = rf;\
294     newframe->Xflags = rg;\
295     newframe->Xrdepth = frame->Xrdepth + 1;\
296     newframe->Xprevframe = frame;\
297     frame = newframe;\
298     DPRINTF(("restarting from line %d\n", __LINE__));\
299     goto HEAP_RECURSE;\
300     L_##rw:\
301     DPRINTF(("jumped back to line %d\n", __LINE__));\
302 nigel 77 }
303    
304     #define RRETURN(ra)\
305     {\
306     heapframe *newframe = frame;\
307     frame = newframe->Xprevframe;\
308     (pcre_stack_free)(newframe);\
309     if (frame != NULL)\
310     {\
311 ph10 164 rrc = ra;\
312     goto HEAP_RETURN;\
313 nigel 77 }\
314     return ra;\
315     }
316    
317    
318     /* Structure for remembering the local variables in a private frame */
319    
320     typedef struct heapframe {
321     struct heapframe *Xprevframe;
322    
323     /* Function arguments that may change */
324    
325 ph10 409 USPTR Xeptr;
326 nigel 77 const uschar *Xecode;
327 ph10 409 USPTR Xmstart;
328 nigel 77 int Xoffset_top;
329     long int Xims;
330     eptrblock *Xeptrb;
331     int Xflags;
332 nigel 91 unsigned int Xrdepth;
333 nigel 77
334     /* Function local variables */
335    
336 ph10 409 USPTR Xcallpat;
337 ph10 406 #ifdef SUPPORT_UTF8
338 ph10 409 USPTR Xcharptr;
339 ph10 406 #endif
340 ph10 409 USPTR Xdata;
341     USPTR Xnext;
342     USPTR Xpp;
343     USPTR Xprev;
344     USPTR Xsaved_eptr;
345 nigel 77
346     recursion_info Xnew_recursive;
347    
348     BOOL Xcur_is_word;
349     BOOL Xcondition;
350     BOOL Xprev_is_word;
351    
352     unsigned long int Xoriginal_ims;
353    
354     #ifdef SUPPORT_UCP
355     int Xprop_type;
356 nigel 87 int Xprop_value;
357 nigel 77 int Xprop_fail_result;
358     int Xprop_category;
359     int Xprop_chartype;
360 nigel 87 int Xprop_script;
361 ph10 123 int Xoclength;
362     uschar Xocchars[8];
363 nigel 77 #endif
364    
365 ph10 403 int Xcodelink;
366 nigel 77 int Xctype;
367 nigel 93 unsigned int Xfc;
368 nigel 77 int Xfi;
369     int Xlength;
370     int Xmax;
371     int Xmin;
372     int Xnumber;
373     int Xoffset;
374     int Xop;
375     int Xsave_capture_last;
376     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377     int Xstacksave[REC_STACK_SAVE_MAX];
378    
379     eptrblock Xnewptrb;
380    
381 ph10 164 /* Where to jump back to */
382 nigel 77
383 ph10 164 int Xwhere;
384 ph10 165
385 nigel 77 } heapframe;
386    
387     #endif
388    
389    
390     /***************************************************************************
391     ***************************************************************************/
392    
393    
394    
395     /*************************************************
396     * Match from current position *
397     *************************************************/
398    
399 nigel 93 /* This function is called recursively in many circumstances. Whenever it
400 nigel 77 returns a negative (error) response, the outer incarnation must also return the
401 ph10 426 same response. */
402 nigel 77
403 ph10 426 /* These macros pack up tests that are used for partial matching, and which
404     appears several times in the code. We set the "hit end" flag if the pointer is
405     at the end of the subject and also past the start of the subject (i.e.
406 ph10 427 something has been matched). For hard partial matching, we then return
407     immediately. The second one is used when we already know we are past the end of
408     the subject. */
409 ph10 426
410     #define CHECK_PARTIAL()\
411 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412 ph10 427 {\
413     md->hitend = TRUE;\
414     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415     }
416 ph10 426
417     #define SCHECK_PARTIAL()\
418 ph10 462 if (md->partial != 0 && eptr > mstart)\
419 ph10 427 {\
420     md->hitend = TRUE;\
421     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422     }
423 ph10 426
424 ph10 427
425 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
426     the md structure (e.g. utf8, end_subject) into individual variables to improve
427 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
428     made performance worse.
429    
430     Arguments:
431 nigel 93 eptr pointer to current character in subject
432     ecode pointer to current position in compiled code
433 ph10 168 mstart pointer to the current match start position (can be modified
434 ph10 172 by encountering \K)
435 nigel 77 offset_top current top pointer
436     md pointer to "static" info for the match
437     ims current /i, /m, and /s options
438     eptrb pointer to chain of blocks containing eptr at start of
439     brackets - for testing for empty matches
440     flags can contain
441     match_condassert - this is an assertion condition
442 nigel 93 match_cbegroup - this is the start of an unlimited repeat
443     group that can match an empty string
444 nigel 87 rdepth the recursion depth
445 nigel 77
446     Returns: MATCH_MATCH if matched ) these values are >= 0
447     MATCH_NOMATCH if failed to match )
448     a negative PCRE_ERROR_xxx value if aborted by an error condition
449 nigel 87 (e.g. stopped by repeated call or recursion limit)
450 nigel 77 */
451    
452     static int
453 ph10 409 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455 nigel 91 int flags, unsigned int rdepth)
456 nigel 77 {
457     /* These variables do not need to be preserved over recursion in this function,
458 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
459     "register" because they are used a lot in loops. */
460 nigel 77
461 nigel 91 register int rrc; /* Returns from recursive calls */
462     register int i; /* Used for loops not involving calls to RMATCH() */
463 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
464 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
465 nigel 77
466 nigel 93 BOOL minimize, possessive; /* Quantifier options */
467 ph10 403 int condcode;
468 nigel 93
469 nigel 77 /* When recursion is not being used, all "local" variables that have to be
470     preserved over calls to RMATCH() are part of a "frame" which is obtained from
471     heap storage. Set up the top-level frame here; others are obtained from the
472     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
473    
474     #ifdef NO_RECURSE
475     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
476     frame->Xprevframe = NULL; /* Marks the top level */
477    
478     /* Copy in the original argument variables */
479    
480     frame->Xeptr = eptr;
481     frame->Xecode = ecode;
482 ph10 168 frame->Xmstart = mstart;
483 nigel 77 frame->Xoffset_top = offset_top;
484     frame->Xims = ims;
485     frame->Xeptrb = eptrb;
486     frame->Xflags = flags;
487 nigel 87 frame->Xrdepth = rdepth;
488 nigel 77
489     /* This is where control jumps back to to effect "recursion" */
490    
491     HEAP_RECURSE:
492    
493     /* Macros make the argument variables come from the current frame */
494    
495     #define eptr frame->Xeptr
496     #define ecode frame->Xecode
497 ph10 168 #define mstart frame->Xmstart
498 nigel 77 #define offset_top frame->Xoffset_top
499     #define ims frame->Xims
500     #define eptrb frame->Xeptrb
501     #define flags frame->Xflags
502 nigel 87 #define rdepth frame->Xrdepth
503 nigel 77
504     /* Ditto for the local variables */
505    
506     #ifdef SUPPORT_UTF8
507     #define charptr frame->Xcharptr
508     #endif
509     #define callpat frame->Xcallpat
510 ph10 403 #define codelink frame->Xcodelink
511 nigel 77 #define data frame->Xdata
512     #define next frame->Xnext
513     #define pp frame->Xpp
514     #define prev frame->Xprev
515     #define saved_eptr frame->Xsaved_eptr
516    
517     #define new_recursive frame->Xnew_recursive
518    
519     #define cur_is_word frame->Xcur_is_word
520     #define condition frame->Xcondition
521     #define prev_is_word frame->Xprev_is_word
522    
523     #define original_ims frame->Xoriginal_ims
524    
525     #ifdef SUPPORT_UCP
526     #define prop_type frame->Xprop_type
527 nigel 87 #define prop_value frame->Xprop_value
528 nigel 77 #define prop_fail_result frame->Xprop_fail_result
529     #define prop_category frame->Xprop_category
530     #define prop_chartype frame->Xprop_chartype
531 nigel 87 #define prop_script frame->Xprop_script
532 ph10 115 #define oclength frame->Xoclength
533     #define occhars frame->Xocchars
534 nigel 77 #endif
535    
536     #define ctype frame->Xctype
537     #define fc frame->Xfc
538     #define fi frame->Xfi
539     #define length frame->Xlength
540     #define max frame->Xmax
541     #define min frame->Xmin
542     #define number frame->Xnumber
543     #define offset frame->Xoffset
544     #define op frame->Xop
545     #define save_capture_last frame->Xsave_capture_last
546     #define save_offset1 frame->Xsave_offset1
547     #define save_offset2 frame->Xsave_offset2
548     #define save_offset3 frame->Xsave_offset3
549     #define stacksave frame->Xstacksave
550    
551     #define newptrb frame->Xnewptrb
552    
553     /* When recursion is being used, local variables are allocated on the stack and
554     get preserved during recursion in the normal way. In this environment, fi and
555     i, and fc and c, can be the same variables. */
556    
557 nigel 93 #else /* NO_RECURSE not defined */
558 nigel 77 #define fi i
559     #define fc c
560    
561    
562 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
563     const uschar *charptr; /* in small blocks of the code. My normal */
564     #endif /* style of coding would have declared */
565     const uschar *callpat; /* them within each of those blocks. */
566     const uschar *data; /* However, in order to accommodate the */
567     const uschar *next; /* version of this code that uses an */
568     USPTR pp; /* external "stack" implemented on the */
569     const uschar *prev; /* heap, it is easier to declare them all */
570     USPTR saved_eptr; /* here, so the declarations can be cut */
571     /* out in a block. The only declarations */
572     recursion_info new_recursive; /* within blocks below are for variables */
573     /* that do not have to be preserved over */
574     BOOL cur_is_word; /* a recursive call to RMATCH(). */
575     BOOL condition;
576 nigel 77 BOOL prev_is_word;
577    
578     unsigned long int original_ims;
579    
580     #ifdef SUPPORT_UCP
581     int prop_type;
582 nigel 87 int prop_value;
583 nigel 77 int prop_fail_result;
584     int prop_category;
585     int prop_chartype;
586 nigel 87 int prop_script;
587 ph10 115 int oclength;
588     uschar occhars[8];
589 nigel 77 #endif
590    
591 ph10 399 int codelink;
592 nigel 77 int ctype;
593     int length;
594     int max;
595     int min;
596     int number;
597     int offset;
598     int op;
599     int save_capture_last;
600     int save_offset1, save_offset2, save_offset3;
601     int stacksave[REC_STACK_SAVE_MAX];
602    
603     eptrblock newptrb;
604 nigel 93 #endif /* NO_RECURSE */
605 nigel 77
606     /* These statements are here to stop the compiler complaining about unitialized
607     variables. */
608    
609     #ifdef SUPPORT_UCP
610 nigel 87 prop_value = 0;
611 nigel 77 prop_fail_result = 0;
612     #endif
613    
614 nigel 93
615 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
616     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
617     used. Thanks to Ian Taylor for noticing this possibility and sending the
618     original patch. */
619    
620     TAIL_RECURSE:
621    
622 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
623     are specified by the macro RMATCH and RRETURN is used to return. When
624     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
625     and a "return", respectively (possibly with some debugging if DEBUG is
626     defined). However, RMATCH isn't like a function call because it's quite a
627     complicated macro. It has to be used in one particular way. This shouldn't,
628     however, impact performance when true recursion is being used. */
629 nigel 77
630 ph10 164 #ifdef SUPPORT_UTF8
631     utf8 = md->utf8; /* Local copy of the flag */
632     #else
633     utf8 = FALSE;
634     #endif
635    
636 nigel 87 /* First check that we haven't called match() too many times, or that we
637     haven't exceeded the recursive call limit. */
638    
639 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
640 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
641 nigel 77
642     original_ims = ims; /* Save for resetting on ')' */
643 nigel 91
644 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
645     string, the match_cbegroup flag is set. When this is the case, add the current
646     subject pointer to the chain of such remembered pointers, to be checked when we
647     hit the closing ket, in order to break infinite loops that match no characters.
648 ph10 197 When match() is called in other circumstances, don't add to the chain. The
649     match_cbegroup flag must NOT be used with tail recursion, because the memory
650     block that is used is on the stack, so a new one may be required for each
651     match(). */
652 nigel 77
653 nigel 93 if ((flags & match_cbegroup) != 0)
654 nigel 77 {
655 ph10 197 newptrb.epb_saved_eptr = eptr;
656     newptrb.epb_prev = eptrb;
657     eptrb = &newptrb;
658 nigel 77 }
659    
660 nigel 93 /* Now start processing the opcodes. */
661 nigel 77
662     for (;;)
663     {
664 nigel 93 minimize = possessive = FALSE;
665 nigel 77 op = *ecode;
666 ph10 443
667 nigel 93 switch(op)
668     {
669 ph10 210 case OP_FAIL:
670 ph10 212 RRETURN(MATCH_NOMATCH);
671 ph10 211
672 ph10 210 case OP_PRUNE:
673     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
674     ims, eptrb, flags, RM51);
675     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676 ph10 212 RRETURN(MATCH_PRUNE);
677 ph10 211
678 ph10 210 case OP_COMMIT:
679     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
680     ims, eptrb, flags, RM52);
681     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682 ph10 212 RRETURN(MATCH_COMMIT);
683 ph10 211
684 ph10 210 case OP_SKIP:
685     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
686     ims, eptrb, flags, RM53);
687     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
688 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
689 ph10 212 RRETURN(MATCH_SKIP);
690 ph10 211
691 ph10 210 case OP_THEN:
692     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693 ph10 212 ims, eptrb, flags, RM54);
694 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695 ph10 212 RRETURN(MATCH_THEN);
696 ph10 211
697 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
698     the current subject position in the working slot at the top of the vector.
699     We mustn't change the current values of the data slot, because they may be
700     set from a previous iteration of this group, and be referred to by a
701     reference inside the group.
702 nigel 77
703 nigel 93 If the bracket fails to match, we need to restore this value and also the
704     values of the final offsets, in case they were set by a previous iteration
705     of the same bracket.
706 nigel 77
707 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
708     a non-capturing bracket. Don't worry about setting the flag for the error
709     case here; that is handled in the code for KET. */
710 nigel 77
711 nigel 93 case OP_CBRA:
712     case OP_SCBRA:
713     number = GET2(ecode, 1+LINK_SIZE);
714 nigel 77 offset = number << 1;
715    
716     #ifdef DEBUG
717 nigel 93 printf("start bracket %d\n", number);
718     printf("subject=");
719 nigel 77 pchars(eptr, 16, TRUE, md);
720     printf("\n");
721     #endif
722    
723     if (offset < md->offset_max)
724     {
725     save_offset1 = md->offset_vector[offset];
726     save_offset2 = md->offset_vector[offset+1];
727     save_offset3 = md->offset_vector[md->offset_end - number];
728     save_capture_last = md->capture_last;
729    
730     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
731     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
732    
733 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
734 nigel 77 do
735     {
736 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
737     ims, eptrb, flags, RM1);
738 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
739 nigel 77 md->capture_last = save_capture_last;
740     ecode += GET(ecode, 1);
741     }
742     while (*ecode == OP_ALT);
743    
744     DPRINTF(("bracket %d failed\n", number));
745    
746     md->offset_vector[offset] = save_offset1;
747     md->offset_vector[offset+1] = save_offset2;
748     md->offset_vector[md->offset_end - number] = save_offset3;
749    
750     RRETURN(MATCH_NOMATCH);
751     }
752    
753 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
754     as a non-capturing bracket. */
755 nigel 77
756 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
757     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
758    
759 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
760 nigel 77
761 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
765     final alternative within the brackets, we would return the result of a
766     recursive call to match() whatever happened. We can reduce stack usage by
767 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
768     is set.*/
769 nigel 77
770 nigel 93 case OP_BRA:
771     case OP_SBRA:
772     DPRINTF(("start non-capturing bracket\n"));
773     flags = (op >= OP_SBRA)? match_cbegroup : 0;
774 nigel 91 for (;;)
775 nigel 77 {
776 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
777 nigel 93 {
778 ph10 197 if (flags == 0) /* Not a possibly empty group */
779     {
780     ecode += _pcre_OP_lengths[*ecode];
781     DPRINTF(("bracket 0 tail recursion\n"));
782     goto TAIL_RECURSE;
783     }
784    
785     /* Possibly empty group; can't use tail recursion. */
786    
787     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
788     eptrb, flags, RM48);
789     RRETURN(rrc);
790 nigel 93 }
791 nigel 91
792     /* For non-final alternatives, continue the loop for a NOMATCH result;
793     otherwise return. */
794    
795 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
796     eptrb, flags, RM2);
797 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
798 nigel 77 ecode += GET(ecode, 1);
799     }
800 nigel 91 /* Control never reaches here. */
801 nigel 77
802     /* Conditional group: compilation checked that there are no more than
803     two branches. If the condition is false, skipping the first branch takes us
804     past the end if there is only one branch, but that's OK because that is
805 nigel 91 exactly what going to the ket would do. As there is only one branch to be
806     obeyed, we can use tail recursion to avoid using another stack frame. */
807 nigel 77
808     case OP_COND:
809 nigel 93 case OP_SCOND:
810 ph10 399 codelink= GET(ecode, 1);
811 ph10 406
812 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
813     inserted between OP_COND and an assertion condition. */
814 ph10 392
815 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816     {
817     if (pcre_callout != NULL)
818     {
819     pcre_callout_block cb;
820     cb.version = 1; /* Version 1 of the callout block */
821     cb.callout_number = ecode[LINK_SIZE+2];
822     cb.offset_vector = md->offset_vector;
823     cb.subject = (PCRE_SPTR)md->start_subject;
824     cb.subject_length = md->end_subject - md->start_subject;
825     cb.start_match = mstart - md->start_subject;
826     cb.current_position = eptr - md->start_subject;
827     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829     cb.capture_top = offset_top/2;
830     cb.capture_last = md->capture_last;
831     cb.callout_data = md->callout_data;
832     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833     if (rrc < 0) RRETURN(rrc);
834     }
835     ecode += _pcre_OP_lengths[OP_CALLOUT];
836     }
837 ph10 392
838 ph10 399 condcode = ecode[LINK_SIZE+1];
839 ph10 406
840 ph10 381 /* Now see what the actual condition is */
841 ph10 392
842 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
843 nigel 77 {
844 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
845     {
846 ph10 461 condition = FALSE;
847     ecode += GET(ecode, 1);
848     }
849 ph10 459 else
850 ph10 461 {
851 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
852     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
853 ph10 461
854 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
855     false, but the test was set up by name, scan the table to see if the
856     name refers to any other numbers, and test them. The condition is true
857     if any one is set. */
858 ph10 461
859 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
860     {
861     uschar *slotA = md->name_table;
862     for (i = 0; i < md->name_count; i++)
863 ph10 461 {
864     if (GET2(slotA, 0) == recno) break;
865 ph10 459 slotA += md->name_entry_size;
866     }
867 ph10 461
868 ph10 459 /* Found a name for the number - there can be only one; duplicate
869     names for different numbers are allowed, but not vice versa. First
870     scan down for duplicates. */
871 ph10 461
872 ph10 459 if (i < md->name_count)
873 ph10 461 {
874 ph10 459 uschar *slotB = slotA;
875     while (slotB > md->name_table)
876     {
877     slotB -= md->name_entry_size;
878     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
879     {
880     condition = GET2(slotB, 0) == md->recursive->group_num;
881 ph10 461 if (condition) break;
882     }
883 ph10 459 else break;
884 ph10 461 }
885    
886 ph10 459 /* Scan up for duplicates */
887 ph10 461
888 ph10 459 if (!condition)
889 ph10 461 {
890 ph10 459 slotB = slotA;
891     for (i++; i < md->name_count; i++)
892     {
893     slotB += md->name_entry_size;
894     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
895     {
896     condition = GET2(slotB, 0) == md->recursive->group_num;
897     if (condition) break;
898 ph10 461 }
899 ph10 459 else break;
900 ph10 461 }
901     }
902 ph10 459 }
903 ph10 461 }
904    
905 ph10 459 /* Chose branch according to the condition */
906 ph10 461
907 ph10 459 ecode += condition? 3 : GET(ecode, 1);
908     }
909 ph10 461 }
910 nigel 93
911 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
912 nigel 93 {
913 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
914 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
915 ph10 461
916 ph10 459 /* If the numbered capture is unset, but the reference was by name,
917 ph10 461 scan the table to see if the name refers to any other numbers, and test
918     them. The condition is true if any one is set. This is tediously similar
919     to the code above, but not close enough to try to amalgamate. */
920    
921 ph10 459 if (!condition && condcode == OP_NCREF)
922     {
923 ph10 461 int refno = offset >> 1;
924 ph10 459 uschar *slotA = md->name_table;
925 ph10 461
926 ph10 459 for (i = 0; i < md->name_count; i++)
927 ph10 461 {
928     if (GET2(slotA, 0) == refno) break;
929 ph10 459 slotA += md->name_entry_size;
930     }
931 ph10 461
932     /* Found a name for the number - there can be only one; duplicate names
933     for different numbers are allowed, but not vice versa. First scan down
934 ph10 459 for duplicates. */
935 ph10 461
936 ph10 459 if (i < md->name_count)
937 ph10 461 {
938 ph10 459 uschar *slotB = slotA;
939     while (slotB > md->name_table)
940     {
941     slotB -= md->name_entry_size;
942     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
943     {
944     offset = GET2(slotB, 0) << 1;
945 ph10 461 condition = offset < offset_top &&
946 ph10 459 md->offset_vector[offset] >= 0;
947 ph10 461 if (condition) break;
948     }
949 ph10 459 else break;
950 ph10 461 }
951    
952 ph10 459 /* Scan up for duplicates */
953 ph10 461
954 ph10 459 if (!condition)
955 ph10 461 {
956 ph10 459 slotB = slotA;
957     for (i++; i < md->name_count; i++)
958     {
959     slotB += md->name_entry_size;
960     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961     {
962     offset = GET2(slotB, 0) << 1;
963 ph10 461 condition = offset < offset_top &&
964 ph10 459 md->offset_vector[offset] >= 0;
965 ph10 461 if (condition) break;
966     }
967 ph10 459 else break;
968 ph10 461 }
969     }
970 ph10 459 }
971 ph10 461 }
972    
973 ph10 459 /* Chose branch according to the condition */
974    
975 nigel 93 ecode += condition? 3 : GET(ecode, 1);
976 nigel 77 }
977    
978 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
979 nigel 93 {
980     condition = FALSE;
981     ecode += GET(ecode, 1);
982     }
983    
984 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
985 nigel 93 the final argument match_condassert causes it to stop at the end of an
986     assertion. */
987 nigel 77
988     else
989     {
990 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
991     match_condassert, RM3);
992 nigel 77 if (rrc == MATCH_MATCH)
993     {
994 nigel 93 condition = TRUE;
995     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
996 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
997     }
998 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
999 nigel 77 {
1000     RRETURN(rrc); /* Need braces because of following else */
1001     }
1002 nigel 93 else
1003     {
1004     condition = FALSE;
1005 ph10 399 ecode += codelink;
1006 nigel 93 }
1007     }
1008 nigel 91
1009 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1010 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1011     match_cbegroup is required for an unlimited repeat of a possibly empty
1012     group. If the second alternative doesn't exist, we can just plough on. */
1013 nigel 91
1014 nigel 93 if (condition || *ecode == OP_ALT)
1015     {
1016 nigel 91 ecode += 1 + LINK_SIZE;
1017 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1018     {
1019     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1020     RRETURN(rrc);
1021     }
1022     else /* Group must match something */
1023     {
1024     flags = 0;
1025     goto TAIL_RECURSE;
1026     }
1027 nigel 77 }
1028 ph10 395 else /* Condition false & no alternative */
1029 nigel 93 {
1030     ecode += 1 + LINK_SIZE;
1031     }
1032     break;
1033 nigel 77
1034 ph10 461
1035 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1036     to close any currently open capturing brackets. */
1037 ph10 461
1038 ph10 447 case OP_CLOSE:
1039 ph10 461 number = GET2(ecode, 1);
1040 ph10 447 offset = number << 1;
1041 ph10 461
1042 ph10 447 #ifdef DEBUG
1043     printf("end bracket %d at *ACCEPT", number);
1044     printf("\n");
1045     #endif
1046 nigel 77
1047 ph10 447 md->capture_last = number;
1048     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1049     {
1050     md->offset_vector[offset] =
1051     md->offset_vector[md->offset_end - number];
1052     md->offset_vector[offset+1] = eptr - md->start_subject;
1053     if (offset_top <= offset) offset_top = offset + 2;
1054     }
1055     ecode += 3;
1056 ph10 461 break;
1057 ph10 447
1058    
1059 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1060     recursion, we should restore the offsets appropriately and continue from
1061     after the call. */
1062 nigel 77
1063 ph10 210 case OP_ACCEPT:
1064 nigel 77 case OP_END:
1065     if (md->recursive != NULL && md->recursive->group_num == 0)
1066     {
1067     recursion_info *rec = md->recursive;
1068 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1069 nigel 77 md->recursive = rec->prevrec;
1070     memmove(md->offset_vector, rec->offset_save,
1071     rec->saved_max * sizeof(int));
1072 ph10 461 offset_top = rec->save_offset_top;
1073 ph10 168 mstart = rec->save_start;
1074 nigel 77 ims = original_ims;
1075     ecode = rec->after_call;
1076     break;
1077     }
1078    
1079 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1080     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1081     the subject. In both cases, backtracking will then try other alternatives,
1082     if any. */
1083 ph10 443
1084 ph10 442 if (eptr == mstart &&
1085     (md->notempty ||
1086 ph10 443 (md->notempty_atstart &&
1087 ph10 442 mstart == md->start_subject + md->start_offset)))
1088 ph10 443 RRETURN(MATCH_NOMATCH);
1089    
1090 ph10 442 /* Otherwise, we have a match. */
1091 nigel 77
1092 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1093     md->end_offset_top = offset_top; /* and how many extracts were taken */
1094 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1095 nigel 77 RRETURN(MATCH_MATCH);
1096    
1097     /* Change option settings */
1098    
1099     case OP_OPT:
1100     ims = ecode[1];
1101     ecode += 2;
1102     DPRINTF(("ims set to %02lx\n", ims));
1103     break;
1104    
1105     /* Assertion brackets. Check the alternative branches in turn - the
1106     matching won't pass the KET for an assertion. If any one branch matches,
1107     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1108     start of each branch to move the current point backwards, so the code at
1109     this level is identical to the lookahead case. */
1110    
1111     case OP_ASSERT:
1112     case OP_ASSERTBACK:
1113     do
1114     {
1115 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1116     RM4);
1117 nigel 77 if (rrc == MATCH_MATCH) break;
1118 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1119 nigel 77 ecode += GET(ecode, 1);
1120     }
1121     while (*ecode == OP_ALT);
1122     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1123    
1124     /* If checking an assertion for a condition, return MATCH_MATCH. */
1125    
1126     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1127    
1128     /* Continue from after the assertion, updating the offsets high water
1129     mark, since extracts may have been taken during the assertion. */
1130    
1131     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1132     ecode += 1 + LINK_SIZE;
1133     offset_top = md->end_offset_top;
1134     continue;
1135    
1136     /* Negative assertion: all branches must fail to match */
1137    
1138     case OP_ASSERT_NOT:
1139     case OP_ASSERTBACK_NOT:
1140     do
1141     {
1142 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1143     RM5);
1144 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1145 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1146 nigel 77 ecode += GET(ecode,1);
1147     }
1148     while (*ecode == OP_ALT);
1149    
1150     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1151    
1152     ecode += 1 + LINK_SIZE;
1153     continue;
1154    
1155     /* Move the subject pointer back. This occurs only at the start of
1156     each branch of a lookbehind assertion. If we are too close to the start to
1157     move back, this match function fails. When working with UTF-8 we move
1158     back a number of characters, not bytes. */
1159    
1160     case OP_REVERSE:
1161     #ifdef SUPPORT_UTF8
1162     if (utf8)
1163     {
1164 nigel 93 i = GET(ecode, 1);
1165     while (i-- > 0)
1166 nigel 77 {
1167     eptr--;
1168     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1169 ph10 207 BACKCHAR(eptr);
1170 nigel 77 }
1171     }
1172     else
1173     #endif
1174    
1175     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1176    
1177     {
1178 nigel 93 eptr -= GET(ecode, 1);
1179 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1180     }
1181    
1182 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1183 nigel 77
1184 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1185 nigel 77 ecode += 1 + LINK_SIZE;
1186     break;
1187    
1188     /* The callout item calls an external function, if one is provided, passing
1189     details of the match so far. This is mainly for debugging, though the
1190     function is able to force a failure. */
1191    
1192     case OP_CALLOUT:
1193     if (pcre_callout != NULL)
1194     {
1195     pcre_callout_block cb;
1196     cb.version = 1; /* Version 1 of the callout block */
1197     cb.callout_number = ecode[1];
1198     cb.offset_vector = md->offset_vector;
1199 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1200 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1201 ph10 168 cb.start_match = mstart - md->start_subject;
1202 nigel 77 cb.current_position = eptr - md->start_subject;
1203     cb.pattern_position = GET(ecode, 2);
1204     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1205     cb.capture_top = offset_top/2;
1206     cb.capture_last = md->capture_last;
1207     cb.callout_data = md->callout_data;
1208     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1209     if (rrc < 0) RRETURN(rrc);
1210     }
1211     ecode += 2 + 2*LINK_SIZE;
1212     break;
1213    
1214     /* Recursion either matches the current regex, or some subexpression. The
1215     offset data is the offset to the starting bracket from the start of the
1216     whole pattern. (This is so that it works from duplicated subpatterns.)
1217    
1218     If there are any capturing brackets started but not finished, we have to
1219     save their starting points and reinstate them after the recursion. However,
1220     we don't know how many such there are (offset_top records the completed
1221     total) so we just have to save all the potential data. There may be up to
1222     65535 such values, which is too large to put on the stack, but using malloc
1223     for small numbers seems expensive. As a compromise, the stack is used when
1224     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1225     is used. A problem is what to do if the malloc fails ... there is no way of
1226     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1227     values on the stack, and accept that the rest may be wrong.
1228    
1229     There are also other values that have to be saved. We use a chained
1230     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1231     for the original version of this logic. */
1232    
1233     case OP_RECURSE:
1234     {
1235     callpat = md->start_code + GET(ecode, 1);
1236 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1237     GET2(callpat, 1 + LINK_SIZE);
1238 nigel 77
1239     /* Add to "recursing stack" */
1240    
1241     new_recursive.prevrec = md->recursive;
1242     md->recursive = &new_recursive;
1243    
1244     /* Find where to continue from afterwards */
1245    
1246     ecode += 1 + LINK_SIZE;
1247     new_recursive.after_call = ecode;
1248    
1249     /* Now save the offset data. */
1250    
1251     new_recursive.saved_max = md->offset_end;
1252     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1253     new_recursive.offset_save = stacksave;
1254     else
1255     {
1256     new_recursive.offset_save =
1257     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1258     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1259     }
1260    
1261     memcpy(new_recursive.offset_save, md->offset_vector,
1262     new_recursive.saved_max * sizeof(int));
1263 ph10 168 new_recursive.save_start = mstart;
1264 ph10 461 new_recursive.save_offset_top = offset_top;
1265 ph10 168 mstart = eptr;
1266 nigel 77
1267     /* OK, now we can do the recursion. For each top-level alternative we
1268     restore the offset and recursion data. */
1269    
1270     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1271 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1272 nigel 77 do
1273     {
1274 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1275     md, ims, eptrb, flags, RM6);
1276 nigel 77 if (rrc == MATCH_MATCH)
1277     {
1278 nigel 87 DPRINTF(("Recursion matched\n"));
1279 nigel 77 md->recursive = new_recursive.prevrec;
1280     if (new_recursive.offset_save != stacksave)
1281     (pcre_free)(new_recursive.offset_save);
1282     RRETURN(MATCH_MATCH);
1283     }
1284 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1285 nigel 87 {
1286     DPRINTF(("Recursion gave error %d\n", rrc));
1287 ph10 400 if (new_recursive.offset_save != stacksave)
1288     (pcre_free)(new_recursive.offset_save);
1289 nigel 87 RRETURN(rrc);
1290     }
1291 nigel 77
1292     md->recursive = &new_recursive;
1293     memcpy(md->offset_vector, new_recursive.offset_save,
1294     new_recursive.saved_max * sizeof(int));
1295     callpat += GET(callpat, 1);
1296     }
1297     while (*callpat == OP_ALT);
1298    
1299     DPRINTF(("Recursion didn't match\n"));
1300     md->recursive = new_recursive.prevrec;
1301     if (new_recursive.offset_save != stacksave)
1302     (pcre_free)(new_recursive.offset_save);
1303     RRETURN(MATCH_NOMATCH);
1304     }
1305     /* Control never reaches here */
1306    
1307     /* "Once" brackets are like assertion brackets except that after a match,
1308     the point in the subject string is not moved back. Thus there can never be
1309     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1310     Check the alternative branches in turn - the matching won't pass the KET
1311     for this kind of subpattern. If any one branch matches, we carry on as at
1312     the end of a normal bracket, leaving the subject pointer. */
1313    
1314     case OP_ONCE:
1315 nigel 91 prev = ecode;
1316     saved_eptr = eptr;
1317    
1318     do
1319 nigel 77 {
1320 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1321 nigel 91 if (rrc == MATCH_MATCH) break;
1322 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1323 nigel 91 ecode += GET(ecode,1);
1324     }
1325     while (*ecode == OP_ALT);
1326 nigel 77
1327 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1328 nigel 77
1329 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1330 nigel 77
1331 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1332     mark, since extracts may have been taken. */
1333 nigel 77
1334 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1335 nigel 77
1336 nigel 91 offset_top = md->end_offset_top;
1337     eptr = md->end_match_ptr;
1338 nigel 77
1339 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1340     happens for a repeating ket if no characters were matched in the group.
1341     This is the forcible breaking of infinite loops as implemented in Perl
1342     5.005. If there is an options reset, it will get obeyed in the normal
1343     course of events. */
1344 nigel 77
1345 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1346     {
1347     ecode += 1+LINK_SIZE;
1348     break;
1349     }
1350 nigel 77
1351 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1352     preceding bracket, in the appropriate order. The second "call" of match()
1353     uses tail recursion, to avoid using another stack frame. We need to reset
1354     any options that changed within the bracket before re-running it, so
1355     check the next opcode. */
1356 nigel 77
1357 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1358     {
1359     ims = (ims & ~PCRE_IMS) | ecode[4];
1360     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1361     }
1362 nigel 77
1363 nigel 91 if (*ecode == OP_KETRMIN)
1364     {
1365 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1366 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1367     ecode = prev;
1368 ph10 197 flags = 0;
1369 nigel 91 goto TAIL_RECURSE;
1370 nigel 77 }
1371 nigel 91 else /* OP_KETRMAX */
1372     {
1373 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1374 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1375     ecode += 1 + LINK_SIZE;
1376 ph10 197 flags = 0;
1377 nigel 91 goto TAIL_RECURSE;
1378     }
1379     /* Control never gets here */
1380 nigel 77
1381     /* An alternation is the end of a branch; scan along to find the end of the
1382     bracketed group and go to there. */
1383    
1384     case OP_ALT:
1385     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1386     break;
1387    
1388 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1389     indicating that it may occur zero times. It may repeat infinitely, or not
1390     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1391     with fixed upper repeat limits are compiled as a number of copies, with the
1392     optional ones preceded by BRAZERO or BRAMINZERO. */
1393 nigel 77
1394     case OP_BRAZERO:
1395     {
1396     next = ecode+1;
1397 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1398 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1399     do next += GET(next,1); while (*next == OP_ALT);
1400 nigel 93 ecode = next + 1 + LINK_SIZE;
1401 nigel 77 }
1402     break;
1403    
1404     case OP_BRAMINZERO:
1405     {
1406     next = ecode+1;
1407 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1408 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1409 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1410     ecode++;
1411     }
1412     break;
1413    
1414 ph10 335 case OP_SKIPZERO:
1415     {
1416     next = ecode+1;
1417     do next += GET(next,1); while (*next == OP_ALT);
1418     ecode = next + 1 + LINK_SIZE;
1419     }
1420     break;
1421    
1422 nigel 93 /* End of a group, repeated or non-repeating. */
1423 nigel 77
1424     case OP_KET:
1425     case OP_KETRMIN:
1426     case OP_KETRMAX:
1427 nigel 91 prev = ecode - GET(ecode, 1);
1428 nigel 77
1429 nigel 93 /* If this was a group that remembered the subject start, in order to break
1430     infinite repeats of empty string matches, retrieve the subject start from
1431     the chain. Otherwise, set it NULL. */
1432 nigel 77
1433 nigel 93 if (*prev >= OP_SBRA)
1434     {
1435     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1436     eptrb = eptrb->epb_prev; /* Backup to previous group */
1437     }
1438     else saved_eptr = NULL;
1439 nigel 77
1440 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1441     MATCH_MATCH, but record the current high water mark for use by positive
1442     assertions. Do this also for the "once" (atomic) groups. */
1443    
1444 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1445     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1446     *prev == OP_ONCE)
1447     {
1448     md->end_match_ptr = eptr; /* For ONCE */
1449     md->end_offset_top = offset_top;
1450     RRETURN(MATCH_MATCH);
1451     }
1452 nigel 77
1453 nigel 93 /* For capturing groups we have to check the group number back at the start
1454     and if necessary complete handling an extraction by setting the offsets and
1455     bumping the high water mark. Note that whole-pattern recursion is coded as
1456     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1457     when the OP_END is reached. Other recursion is handled here. */
1458 nigel 77
1459 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1460 nigel 91 {
1461 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1462 nigel 91 offset = number << 1;
1463 ph10 461
1464 nigel 77 #ifdef DEBUG
1465 nigel 91 printf("end bracket %d", number);
1466     printf("\n");
1467 nigel 77 #endif
1468    
1469 nigel 93 md->capture_last = number;
1470     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1471 nigel 91 {
1472 nigel 93 md->offset_vector[offset] =
1473     md->offset_vector[md->offset_end - number];
1474     md->offset_vector[offset+1] = eptr - md->start_subject;
1475     if (offset_top <= offset) offset_top = offset + 2;
1476     }
1477 nigel 77
1478 nigel 93 /* Handle a recursively called group. Restore the offsets
1479     appropriately and continue from after the call. */
1480 nigel 77
1481 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1482     {
1483     recursion_info *rec = md->recursive;
1484     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1485     md->recursive = rec->prevrec;
1486 ph10 168 mstart = rec->save_start;
1487 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1488     rec->saved_max * sizeof(int));
1489 ph10 461 offset_top = rec->save_offset_top;
1490 nigel 93 ecode = rec->after_call;
1491     ims = original_ims;
1492     break;
1493 nigel 77 }
1494 nigel 91 }
1495 nigel 77
1496 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1497     flags, in case they got changed during the group. */
1498 nigel 77
1499 nigel 91 ims = original_ims;
1500     DPRINTF(("ims reset to %02lx\n", ims));
1501 nigel 77
1502 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1503     happens for a repeating ket if no characters were matched in the group.
1504     This is the forcible breaking of infinite loops as implemented in Perl
1505     5.005. If there is an options reset, it will get obeyed in the normal
1506     course of events. */
1507 nigel 77
1508 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1509     {
1510     ecode += 1 + LINK_SIZE;
1511     break;
1512     }
1513 nigel 77
1514 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1515     preceding bracket, in the appropriate order. In the second case, we can use
1516 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1517     unlimited repeat of a group that can match an empty string. */
1518 nigel 77
1519 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1520    
1521 nigel 91 if (*ecode == OP_KETRMIN)
1522     {
1523 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1524 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1525 ph10 197 if (flags != 0) /* Could match an empty string */
1526     {
1527     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1528     RRETURN(rrc);
1529     }
1530 nigel 91 ecode = prev;
1531     goto TAIL_RECURSE;
1532 nigel 77 }
1533 nigel 91 else /* OP_KETRMAX */
1534     {
1535 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1536 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1537     ecode += 1 + LINK_SIZE;
1538 ph10 197 flags = 0;
1539 nigel 91 goto TAIL_RECURSE;
1540     }
1541     /* Control never gets here */
1542 nigel 77
1543     /* Start of subject unless notbol, or after internal newline if multiline */
1544    
1545     case OP_CIRC:
1546     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1547     if ((ims & PCRE_MULTILINE) != 0)
1548     {
1549 nigel 91 if (eptr != md->start_subject &&
1550 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1551 nigel 77 RRETURN(MATCH_NOMATCH);
1552     ecode++;
1553     break;
1554     }
1555     /* ... else fall through */
1556    
1557     /* Start of subject assertion */
1558    
1559     case OP_SOD:
1560     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1561     ecode++;
1562     break;
1563    
1564     /* Start of match assertion */
1565    
1566     case OP_SOM:
1567     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1568     ecode++;
1569     break;
1570 ph10 172
1571 ph10 168 /* Reset the start of match point */
1572 ph10 172
1573 ph10 168 case OP_SET_SOM:
1574     mstart = eptr;
1575 ph10 172 ecode++;
1576     break;
1577 nigel 77
1578     /* Assert before internal newline if multiline, or before a terminating
1579     newline unless endonly is set, else end of subject unless noteol is set. */
1580    
1581     case OP_DOLL:
1582     if ((ims & PCRE_MULTILINE) != 0)
1583     {
1584     if (eptr < md->end_subject)
1585 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1586 nigel 77 else
1587     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1588     ecode++;
1589     break;
1590     }
1591     else
1592     {
1593     if (md->noteol) RRETURN(MATCH_NOMATCH);
1594     if (!md->endonly)
1595     {
1596 nigel 91 if (eptr != md->end_subject &&
1597 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1598 nigel 77 RRETURN(MATCH_NOMATCH);
1599     ecode++;
1600     break;
1601     }
1602     }
1603 nigel 91 /* ... else fall through for endonly */
1604 nigel 77
1605     /* End of subject assertion (\z) */
1606    
1607     case OP_EOD:
1608     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1609     ecode++;
1610     break;
1611    
1612     /* End of subject or ending \n assertion (\Z) */
1613    
1614     case OP_EODN:
1615 nigel 91 if (eptr != md->end_subject &&
1616 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617 nigel 91 RRETURN(MATCH_NOMATCH);
1618 nigel 77 ecode++;
1619     break;
1620    
1621     /* Word boundary assertions */
1622    
1623     case OP_NOT_WORD_BOUNDARY:
1624     case OP_WORD_BOUNDARY:
1625     {
1626    
1627     /* Find out if the previous and current characters are "word" characters.
1628     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1629 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1630 ph10 435 partial matching. */
1631 nigel 77
1632     #ifdef SUPPORT_UTF8
1633     if (utf8)
1634     {
1635     if (eptr == md->start_subject) prev_is_word = FALSE; else
1636     {
1637 ph10 409 USPTR lastptr = eptr - 1;
1638 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1639 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1640 nigel 77 GETCHAR(c, lastptr);
1641     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1642     }
1643 ph10 443 if (eptr >= md->end_subject)
1644 nigel 77 {
1645 ph10 443 SCHECK_PARTIAL();
1646     cur_is_word = FALSE;
1647 ph10 428 }
1648     else
1649     {
1650 nigel 77 GETCHAR(c, eptr);
1651     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1652     }
1653     }
1654     else
1655     #endif
1656    
1657 ph10 428 /* Not in UTF-8 mode */
1658 nigel 77
1659     {
1660 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1661     {
1662 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1663 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1664     }
1665 ph10 443 if (eptr >= md->end_subject)
1666 ph10 428 {
1667 ph10 443 SCHECK_PARTIAL();
1668     cur_is_word = FALSE;
1669 ph10 428 }
1670     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1671 nigel 77 }
1672    
1673     /* Now see if the situation is what we want */
1674    
1675     if ((*ecode++ == OP_WORD_BOUNDARY)?
1676     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1677     RRETURN(MATCH_NOMATCH);
1678     }
1679     break;
1680    
1681     /* Match a single character type; inline for speed */
1682    
1683     case OP_ANY:
1684 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1685 ph10 345 /* Fall through */
1686    
1687 ph10 341 case OP_ALLANY:
1688 ph10 443 if (eptr++ >= md->end_subject)
1689 ph10 428 {
1690 ph10 443 SCHECK_PARTIAL();
1691 ph10 428 RRETURN(MATCH_NOMATCH);
1692 ph10 443 }
1693 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1694 nigel 77 ecode++;
1695     break;
1696    
1697     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1698     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1699    
1700     case OP_ANYBYTE:
1701 ph10 443 if (eptr++ >= md->end_subject)
1702 ph10 428 {
1703 ph10 443 SCHECK_PARTIAL();
1704 ph10 428 RRETURN(MATCH_NOMATCH);
1705 ph10 443 }
1706 nigel 77 ecode++;
1707     break;
1708    
1709     case OP_NOT_DIGIT:
1710 ph10 443 if (eptr >= md->end_subject)
1711 ph10 428 {
1712 ph10 443 SCHECK_PARTIAL();
1713 ph10 428 RRETURN(MATCH_NOMATCH);
1714 ph10 443 }
1715 nigel 77 GETCHARINCTEST(c, eptr);
1716     if (
1717     #ifdef SUPPORT_UTF8
1718     c < 256 &&
1719     #endif
1720     (md->ctypes[c] & ctype_digit) != 0
1721     )
1722     RRETURN(MATCH_NOMATCH);
1723     ecode++;
1724     break;
1725    
1726     case OP_DIGIT:
1727 ph10 443 if (eptr >= md->end_subject)
1728 ph10 428 {
1729 ph10 443 SCHECK_PARTIAL();
1730 ph10 428 RRETURN(MATCH_NOMATCH);
1731 ph10 443 }
1732 nigel 77 GETCHARINCTEST(c, eptr);
1733     if (
1734     #ifdef SUPPORT_UTF8
1735     c >= 256 ||
1736     #endif
1737     (md->ctypes[c] & ctype_digit) == 0
1738     )
1739     RRETURN(MATCH_NOMATCH);
1740     ecode++;
1741     break;
1742    
1743     case OP_NOT_WHITESPACE:
1744 ph10 443 if (eptr >= md->end_subject)
1745 ph10 428 {
1746 ph10 443 SCHECK_PARTIAL();
1747 ph10 428 RRETURN(MATCH_NOMATCH);
1748 ph10 443 }
1749 nigel 77 GETCHARINCTEST(c, eptr);
1750     if (
1751     #ifdef SUPPORT_UTF8
1752     c < 256 &&
1753     #endif
1754     (md->ctypes[c] & ctype_space) != 0
1755     )
1756     RRETURN(MATCH_NOMATCH);
1757     ecode++;
1758     break;
1759    
1760     case OP_WHITESPACE:
1761 ph10 443 if (eptr >= md->end_subject)
1762 ph10 428 {
1763 ph10 443 SCHECK_PARTIAL();
1764 ph10 428 RRETURN(MATCH_NOMATCH);
1765 ph10 443 }
1766 nigel 77 GETCHARINCTEST(c, eptr);
1767     if (
1768     #ifdef SUPPORT_UTF8
1769     c >= 256 ||
1770     #endif
1771     (md->ctypes[c] & ctype_space) == 0
1772     )
1773     RRETURN(MATCH_NOMATCH);
1774     ecode++;
1775     break;
1776    
1777     case OP_NOT_WORDCHAR:
1778 ph10 443 if (eptr >= md->end_subject)
1779 ph10 428 {
1780 ph10 443 SCHECK_PARTIAL();
1781 ph10 428 RRETURN(MATCH_NOMATCH);
1782 ph10 443 }
1783 nigel 77 GETCHARINCTEST(c, eptr);
1784     if (
1785     #ifdef SUPPORT_UTF8
1786     c < 256 &&
1787     #endif
1788     (md->ctypes[c] & ctype_word) != 0
1789     )
1790     RRETURN(MATCH_NOMATCH);
1791     ecode++;
1792     break;
1793    
1794     case OP_WORDCHAR:
1795 ph10 443 if (eptr >= md->end_subject)
1796 ph10 428 {
1797 ph10 443 SCHECK_PARTIAL();
1798 ph10 428 RRETURN(MATCH_NOMATCH);
1799 ph10 443 }
1800 nigel 77 GETCHARINCTEST(c, eptr);
1801     if (
1802     #ifdef SUPPORT_UTF8
1803     c >= 256 ||
1804     #endif
1805     (md->ctypes[c] & ctype_word) == 0
1806     )
1807     RRETURN(MATCH_NOMATCH);
1808     ecode++;
1809     break;
1810    
1811 nigel 93 case OP_ANYNL:
1812 ph10 443 if (eptr >= md->end_subject)
1813 ph10 428 {
1814 ph10 443 SCHECK_PARTIAL();
1815 ph10 428 RRETURN(MATCH_NOMATCH);
1816 ph10 443 }
1817 nigel 93 GETCHARINCTEST(c, eptr);
1818     switch(c)
1819     {
1820     default: RRETURN(MATCH_NOMATCH);
1821     case 0x000d:
1822     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1823     break;
1824 ph10 231
1825 nigel 93 case 0x000a:
1826 ph10 231 break;
1827    
1828 nigel 93 case 0x000b:
1829     case 0x000c:
1830     case 0x0085:
1831     case 0x2028:
1832     case 0x2029:
1833 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1834 nigel 93 break;
1835     }
1836     ecode++;
1837     break;
1838    
1839 ph10 178 case OP_NOT_HSPACE:
1840 ph10 443 if (eptr >= md->end_subject)
1841 ph10 428 {
1842 ph10 443 SCHECK_PARTIAL();
1843 ph10 428 RRETURN(MATCH_NOMATCH);
1844 ph10 443 }
1845 ph10 178 GETCHARINCTEST(c, eptr);
1846     switch(c)
1847     {
1848     default: break;
1849     case 0x09: /* HT */
1850     case 0x20: /* SPACE */
1851     case 0xa0: /* NBSP */
1852     case 0x1680: /* OGHAM SPACE MARK */
1853     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1854     case 0x2000: /* EN QUAD */
1855     case 0x2001: /* EM QUAD */
1856     case 0x2002: /* EN SPACE */
1857     case 0x2003: /* EM SPACE */
1858     case 0x2004: /* THREE-PER-EM SPACE */
1859     case 0x2005: /* FOUR-PER-EM SPACE */
1860     case 0x2006: /* SIX-PER-EM SPACE */
1861     case 0x2007: /* FIGURE SPACE */
1862     case 0x2008: /* PUNCTUATION SPACE */
1863     case 0x2009: /* THIN SPACE */
1864     case 0x200A: /* HAIR SPACE */
1865     case 0x202f: /* NARROW NO-BREAK SPACE */
1866     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1867     case 0x3000: /* IDEOGRAPHIC SPACE */
1868     RRETURN(MATCH_NOMATCH);
1869     }
1870     ecode++;
1871     break;
1872    
1873     case OP_HSPACE:
1874 ph10 443 if (eptr >= md->end_subject)
1875 ph10 428 {
1876 ph10 443 SCHECK_PARTIAL();
1877 ph10 428 RRETURN(MATCH_NOMATCH);
1878 ph10 443 }
1879 ph10 178 GETCHARINCTEST(c, eptr);
1880     switch(c)
1881     {
1882     default: RRETURN(MATCH_NOMATCH);
1883     case 0x09: /* HT */
1884     case 0x20: /* SPACE */
1885     case 0xa0: /* NBSP */
1886     case 0x1680: /* OGHAM SPACE MARK */
1887     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1888     case 0x2000: /* EN QUAD */
1889     case 0x2001: /* EM QUAD */
1890     case 0x2002: /* EN SPACE */
1891     case 0x2003: /* EM SPACE */
1892     case 0x2004: /* THREE-PER-EM SPACE */
1893     case 0x2005: /* FOUR-PER-EM SPACE */
1894     case 0x2006: /* SIX-PER-EM SPACE */
1895     case 0x2007: /* FIGURE SPACE */
1896     case 0x2008: /* PUNCTUATION SPACE */
1897     case 0x2009: /* THIN SPACE */
1898     case 0x200A: /* HAIR SPACE */
1899     case 0x202f: /* NARROW NO-BREAK SPACE */
1900     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1901     case 0x3000: /* IDEOGRAPHIC SPACE */
1902     break;
1903     }
1904     ecode++;
1905     break;
1906    
1907     case OP_NOT_VSPACE:
1908 ph10 443 if (eptr >= md->end_subject)
1909 ph10 428 {
1910 ph10 443 SCHECK_PARTIAL();
1911 ph10 428 RRETURN(MATCH_NOMATCH);
1912 ph10 443 }
1913 ph10 178 GETCHARINCTEST(c, eptr);
1914     switch(c)
1915     {
1916     default: break;
1917     case 0x0a: /* LF */
1918     case 0x0b: /* VT */
1919     case 0x0c: /* FF */
1920     case 0x0d: /* CR */
1921     case 0x85: /* NEL */
1922     case 0x2028: /* LINE SEPARATOR */
1923     case 0x2029: /* PARAGRAPH SEPARATOR */
1924     RRETURN(MATCH_NOMATCH);
1925     }
1926     ecode++;
1927     break;
1928    
1929     case OP_VSPACE:
1930 ph10 443 if (eptr >= md->end_subject)
1931 ph10 428 {
1932 ph10 443 SCHECK_PARTIAL();
1933 ph10 428 RRETURN(MATCH_NOMATCH);
1934 ph10 443 }
1935 ph10 178 GETCHARINCTEST(c, eptr);
1936     switch(c)
1937     {
1938     default: RRETURN(MATCH_NOMATCH);
1939     case 0x0a: /* LF */
1940     case 0x0b: /* VT */
1941     case 0x0c: /* FF */
1942     case 0x0d: /* CR */
1943     case 0x85: /* NEL */
1944     case 0x2028: /* LINE SEPARATOR */
1945     case 0x2029: /* PARAGRAPH SEPARATOR */
1946     break;
1947     }
1948     ecode++;
1949     break;
1950    
1951 nigel 77 #ifdef SUPPORT_UCP
1952     /* Check the next character by Unicode property. We will get here only
1953     if the support is in the binary; otherwise a compile-time error occurs. */
1954    
1955     case OP_PROP:
1956     case OP_NOTPROP:
1957 ph10 443 if (eptr >= md->end_subject)
1958 ph10 428 {
1959 ph10 443 SCHECK_PARTIAL();
1960 ph10 428 RRETURN(MATCH_NOMATCH);
1961 ph10 443 }
1962 nigel 77 GETCHARINCTEST(c, eptr);
1963     {
1964 ph10 384 const ucd_record *prop = GET_UCD(c);
1965 nigel 77
1966 nigel 87 switch(ecode[1])
1967     {
1968     case PT_ANY:
1969     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1970     break;
1971 nigel 77
1972 nigel 87 case PT_LAMP:
1973 ph10 349 if ((prop->chartype == ucp_Lu ||
1974     prop->chartype == ucp_Ll ||
1975     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1976 nigel 77 RRETURN(MATCH_NOMATCH);
1977 nigel 87 break;
1978    
1979     case PT_GC:
1980 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1981 nigel 77 RRETURN(MATCH_NOMATCH);
1982 nigel 87 break;
1983    
1984     case PT_PC:
1985 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1986 nigel 87 RRETURN(MATCH_NOMATCH);
1987     break;
1988    
1989     case PT_SC:
1990 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
1991 nigel 87 RRETURN(MATCH_NOMATCH);
1992     break;
1993    
1994     default:
1995     RRETURN(PCRE_ERROR_INTERNAL);
1996 nigel 77 }
1997 nigel 87
1998     ecode += 3;
1999 nigel 77 }
2000     break;
2001    
2002     /* Match an extended Unicode sequence. We will get here only if the support
2003     is in the binary; otherwise a compile-time error occurs. */
2004    
2005     case OP_EXTUNI:
2006 ph10 443 if (eptr >= md->end_subject)
2007 ph10 428 {
2008 ph10 443 SCHECK_PARTIAL();
2009 ph10 428 RRETURN(MATCH_NOMATCH);
2010 ph10 443 }
2011 nigel 77 GETCHARINCTEST(c, eptr);
2012     {
2013 ph10 349 int category = UCD_CATEGORY(c);
2014 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2015     while (eptr < md->end_subject)
2016     {
2017     int len = 1;
2018     if (!utf8) c = *eptr; else
2019     {
2020     GETCHARLEN(c, eptr, len);
2021     }
2022 ph10 349 category = UCD_CATEGORY(c);
2023 nigel 77 if (category != ucp_M) break;
2024     eptr += len;
2025     }
2026     }
2027     ecode++;
2028     break;
2029     #endif
2030    
2031    
2032     /* Match a back reference, possibly repeatedly. Look past the end of the
2033     item to see if there is repeat information following. The code is similar
2034     to that for character classes, but repeated for efficiency. Then obey
2035     similar code to character type repeats - written out again for speed.
2036     However, if the referenced string is the empty string, always treat
2037     it as matched, any number of times (otherwise there could be infinite
2038     loops). */
2039    
2040     case OP_REF:
2041     {
2042     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2043 ph10 345 ecode += 3;
2044    
2045 ph10 336 /* If the reference is unset, there are two possibilities:
2046 ph10 345
2047 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2048     than the amount of subject left; this ensures that every attempt at a
2049     match fails. We can't just fail here, because of the possibility of
2050     quantifiers with zero minima.
2051 ph10 345
2052     (b) If the JavaScript compatibility flag is set, set the length to zero
2053     so that the back reference matches an empty string.
2054    
2055     Otherwise, set the length to the length of what was matched by the
2056 ph10 336 referenced subpattern. */
2057 ph10 345
2058 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2059 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2060 ph10 336 else
2061     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2062 nigel 77
2063     /* Set up for repetition, or handle the non-repeated case */
2064    
2065     switch (*ecode)
2066     {
2067     case OP_CRSTAR:
2068     case OP_CRMINSTAR:
2069     case OP_CRPLUS:
2070     case OP_CRMINPLUS:
2071     case OP_CRQUERY:
2072     case OP_CRMINQUERY:
2073     c = *ecode++ - OP_CRSTAR;
2074     minimize = (c & 1) != 0;
2075     min = rep_min[c]; /* Pick up values from tables; */
2076     max = rep_max[c]; /* zero for max => infinity */
2077     if (max == 0) max = INT_MAX;
2078     break;
2079    
2080     case OP_CRRANGE:
2081     case OP_CRMINRANGE:
2082     minimize = (*ecode == OP_CRMINRANGE);
2083     min = GET2(ecode, 1);
2084     max = GET2(ecode, 3);
2085     if (max == 0) max = INT_MAX;
2086     ecode += 5;
2087     break;
2088    
2089     default: /* No repeat follows */
2090 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2091 ph10 428 {
2092 ph10 443 CHECK_PARTIAL();
2093 ph10 428 RRETURN(MATCH_NOMATCH);
2094 ph10 443 }
2095 nigel 77 eptr += length;
2096     continue; /* With the main loop */
2097     }
2098    
2099     /* If the length of the reference is zero, just continue with the
2100     main loop. */
2101 ph10 443
2102 nigel 77 if (length == 0) continue;
2103    
2104     /* First, ensure the minimum number of matches are present. We get back
2105     the length of the reference string explicitly rather than passing the
2106     address of eptr, so that eptr can be a register variable. */
2107    
2108     for (i = 1; i <= min; i++)
2109     {
2110 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2111 ph10 426 {
2112 ph10 427 CHECK_PARTIAL();
2113 ph10 426 RRETURN(MATCH_NOMATCH);
2114 ph10 427 }
2115 nigel 77 eptr += length;
2116     }
2117    
2118     /* If min = max, continue at the same level without recursion.
2119     They are not both allowed to be zero. */
2120    
2121     if (min == max) continue;
2122    
2123     /* If minimizing, keep trying and advancing the pointer */
2124    
2125     if (minimize)
2126     {
2127     for (fi = min;; fi++)
2128     {
2129 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2130 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2131 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2132     if (!match_ref(offset, eptr, length, md, ims))
2133 ph10 426 {
2134 ph10 427 CHECK_PARTIAL();
2135 nigel 77 RRETURN(MATCH_NOMATCH);
2136 ph10 427 }
2137 nigel 77 eptr += length;
2138     }
2139     /* Control never gets here */
2140     }
2141    
2142     /* If maximizing, find the longest string and work backwards */
2143    
2144     else
2145     {
2146     pp = eptr;
2147     for (i = min; i < max; i++)
2148     {
2149 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2150 ph10 462 {
2151 ph10 463 CHECK_PARTIAL();
2152 ph10 462 break;
2153 ph10 463 }
2154 nigel 77 eptr += length;
2155     }
2156     while (eptr >= pp)
2157     {
2158 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2159 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2160     eptr -= length;
2161     }
2162     RRETURN(MATCH_NOMATCH);
2163     }
2164     }
2165     /* Control never gets here */
2166    
2167     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2168     used when all the characters in the class have values in the range 0-255,
2169     and either the matching is caseful, or the characters are in the range
2170     0-127 when UTF-8 processing is enabled. The only difference between
2171     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2172     encountered.
2173    
2174     First, look past the end of the item to see if there is repeat information
2175     following. Then obey similar code to character type repeats - written out
2176     again for speed. */
2177    
2178     case OP_NCLASS:
2179     case OP_CLASS:
2180     {
2181     data = ecode + 1; /* Save for matching */
2182     ecode += 33; /* Advance past the item */
2183    
2184     switch (*ecode)
2185     {
2186     case OP_CRSTAR:
2187     case OP_CRMINSTAR:
2188     case OP_CRPLUS:
2189     case OP_CRMINPLUS:
2190     case OP_CRQUERY:
2191     case OP_CRMINQUERY:
2192     c = *ecode++ - OP_CRSTAR;
2193     minimize = (c & 1) != 0;
2194     min = rep_min[c]; /* Pick up values from tables; */
2195     max = rep_max[c]; /* zero for max => infinity */
2196     if (max == 0) max = INT_MAX;
2197     break;
2198    
2199     case OP_CRRANGE:
2200     case OP_CRMINRANGE:
2201     minimize = (*ecode == OP_CRMINRANGE);
2202     min = GET2(ecode, 1);
2203     max = GET2(ecode, 3);
2204     if (max == 0) max = INT_MAX;
2205     ecode += 5;
2206     break;
2207    
2208     default: /* No repeat follows */
2209     min = max = 1;
2210     break;
2211     }
2212    
2213     /* First, ensure the minimum number of matches are present. */
2214    
2215     #ifdef SUPPORT_UTF8
2216     /* UTF-8 mode */
2217     if (utf8)
2218     {
2219     for (i = 1; i <= min; i++)
2220     {
2221 ph10 427 if (eptr >= md->end_subject)
2222 ph10 426 {
2223 ph10 428 SCHECK_PARTIAL();
2224 ph10 426 RRETURN(MATCH_NOMATCH);
2225 ph10 427 }
2226 nigel 77 GETCHARINC(c, eptr);
2227     if (c > 255)
2228     {
2229     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2230     }
2231     else
2232     {
2233     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2234     }
2235     }
2236     }
2237     else
2238     #endif
2239     /* Not UTF-8 mode */
2240     {
2241     for (i = 1; i <= min; i++)
2242     {
2243 ph10 427 if (eptr >= md->end_subject)
2244 ph10 426 {
2245 ph10 428 SCHECK_PARTIAL();
2246 ph10 426 RRETURN(MATCH_NOMATCH);
2247 ph10 427 }
2248 nigel 77 c = *eptr++;
2249     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2250     }
2251     }
2252    
2253     /* If max == min we can continue with the main loop without the
2254     need to recurse. */
2255    
2256     if (min == max) continue;
2257    
2258     /* If minimizing, keep testing the rest of the expression and advancing
2259     the pointer while it matches the class. */
2260    
2261     if (minimize)
2262     {
2263     #ifdef SUPPORT_UTF8
2264     /* UTF-8 mode */
2265     if (utf8)
2266     {
2267     for (fi = min;; fi++)
2268     {
2269 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2270 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2271 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2272 ph10 427 if (eptr >= md->end_subject)
2273 ph10 426 {
2274 ph10 427 SCHECK_PARTIAL();
2275 ph10 426 RRETURN(MATCH_NOMATCH);
2276 ph10 427 }
2277 nigel 77 GETCHARINC(c, eptr);
2278     if (c > 255)
2279     {
2280     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2281     }
2282     else
2283     {
2284     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2285     }
2286     }
2287     }
2288     else
2289     #endif
2290     /* Not UTF-8 mode */
2291     {
2292     for (fi = min;; fi++)
2293     {
2294 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2295 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2296 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2297 ph10 427 if (eptr >= md->end_subject)
2298 ph10 426 {
2299 ph10 427 SCHECK_PARTIAL();
2300 ph10 426 RRETURN(MATCH_NOMATCH);
2301 ph10 427 }
2302 nigel 77 c = *eptr++;
2303     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2304     }
2305     }
2306     /* Control never gets here */
2307     }
2308    
2309     /* If maximizing, find the longest possible run, then work backwards. */
2310    
2311     else
2312     {
2313     pp = eptr;
2314    
2315     #ifdef SUPPORT_UTF8
2316     /* UTF-8 mode */
2317     if (utf8)
2318     {
2319     for (i = min; i < max; i++)
2320     {
2321     int len = 1;
2322 ph10 463 if (eptr >= md->end_subject)
2323 ph10 462 {
2324 ph10 463 SCHECK_PARTIAL();
2325 ph10 462 break;
2326 ph10 463 }
2327 nigel 77 GETCHARLEN(c, eptr, len);
2328     if (c > 255)
2329     {
2330     if (op == OP_CLASS) break;
2331     }
2332     else
2333     {
2334     if ((data[c/8] & (1 << (c&7))) == 0) break;
2335     }
2336     eptr += len;
2337     }
2338     for (;;)
2339     {
2340 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2341 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2342     if (eptr-- == pp) break; /* Stop if tried at original pos */
2343     BACKCHAR(eptr);
2344     }
2345     }
2346     else
2347     #endif
2348     /* Not UTF-8 mode */
2349     {
2350     for (i = min; i < max; i++)
2351     {
2352 ph10 463 if (eptr >= md->end_subject)
2353 ph10 462 {
2354 ph10 463 SCHECK_PARTIAL();
2355 ph10 462 break;
2356 ph10 463 }
2357 nigel 77 c = *eptr;
2358     if ((data[c/8] & (1 << (c&7))) == 0) break;
2359     eptr++;
2360     }
2361     while (eptr >= pp)
2362     {
2363 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2364 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2365 nigel 77 eptr--;
2366     }
2367     }
2368    
2369     RRETURN(MATCH_NOMATCH);
2370     }
2371     }
2372     /* Control never gets here */
2373    
2374    
2375     /* Match an extended character class. This opcode is encountered only
2376 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2377     mode, because Unicode properties are supported in non-UTF-8 mode. */
2378 nigel 77
2379     #ifdef SUPPORT_UTF8
2380     case OP_XCLASS:
2381     {
2382     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2383     ecode += GET(ecode, 1); /* Advance past the item */
2384    
2385     switch (*ecode)
2386     {
2387     case OP_CRSTAR:
2388     case OP_CRMINSTAR:
2389     case OP_CRPLUS:
2390     case OP_CRMINPLUS:
2391     case OP_CRQUERY:
2392     case OP_CRMINQUERY:
2393     c = *ecode++ - OP_CRSTAR;
2394     minimize = (c & 1) != 0;
2395     min = rep_min[c]; /* Pick up values from tables; */
2396     max = rep_max[c]; /* zero for max => infinity */
2397     if (max == 0) max = INT_MAX;
2398     break;
2399    
2400     case OP_CRRANGE:
2401     case OP_CRMINRANGE:
2402     minimize = (*ecode == OP_CRMINRANGE);
2403     min = GET2(ecode, 1);
2404     max = GET2(ecode, 3);
2405     if (max == 0) max = INT_MAX;
2406     ecode += 5;
2407     break;
2408    
2409     default: /* No repeat follows */
2410     min = max = 1;
2411     break;
2412     }
2413    
2414     /* First, ensure the minimum number of matches are present. */
2415    
2416     for (i = 1; i <= min; i++)
2417     {
2418 ph10 427 if (eptr >= md->end_subject)
2419 ph10 426 {
2420     SCHECK_PARTIAL();
2421     RRETURN(MATCH_NOMATCH);
2422 ph10 427 }
2423 ph10 384 GETCHARINCTEST(c, eptr);
2424 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2425     }
2426    
2427     /* If max == min we can continue with the main loop without the
2428     need to recurse. */
2429    
2430     if (min == max) continue;
2431    
2432     /* If minimizing, keep testing the rest of the expression and advancing
2433     the pointer while it matches the class. */
2434    
2435     if (minimize)
2436     {
2437     for (fi = min;; fi++)
2438     {
2439 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2440 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2441 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2442 ph10 427 if (eptr >= md->end_subject)
2443 ph10 426 {
2444 ph10 427 SCHECK_PARTIAL();
2445 ph10 426 RRETURN(MATCH_NOMATCH);
2446 ph10 427 }
2447 ph10 384 GETCHARINCTEST(c, eptr);
2448 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2449     }
2450     /* Control never gets here */
2451     }
2452    
2453     /* If maximizing, find the longest possible run, then work backwards. */
2454    
2455     else
2456     {
2457     pp = eptr;
2458     for (i = min; i < max; i++)
2459     {
2460     int len = 1;
2461 ph10 463 if (eptr >= md->end_subject)
2462 ph10 462 {
2463 ph10 463 SCHECK_PARTIAL();
2464 ph10 462 break;
2465 ph10 463 }
2466 ph10 384 GETCHARLENTEST(c, eptr, len);
2467 nigel 77 if (!_pcre_xclass(c, data)) break;
2468     eptr += len;
2469     }
2470     for(;;)
2471     {
2472 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2473 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2474     if (eptr-- == pp) break; /* Stop if tried at original pos */
2475 ph10 214 if (utf8) BACKCHAR(eptr);
2476 nigel 77 }
2477     RRETURN(MATCH_NOMATCH);
2478     }
2479    
2480     /* Control never gets here */
2481     }
2482     #endif /* End of XCLASS */
2483    
2484     /* Match a single character, casefully */
2485    
2486     case OP_CHAR:
2487     #ifdef SUPPORT_UTF8
2488     if (utf8)
2489     {
2490     length = 1;
2491     ecode++;
2492     GETCHARLEN(fc, ecode, length);
2493 ph10 443 if (length > md->end_subject - eptr)
2494 ph10 428 {
2495     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2496     RRETURN(MATCH_NOMATCH);
2497 ph10 443 }
2498 nigel 77 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2499     }
2500     else
2501     #endif
2502    
2503     /* Non-UTF-8 mode */
2504     {
2505 ph10 443 if (md->end_subject - eptr < 1)
2506 ph10 428 {
2507     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2508     RRETURN(MATCH_NOMATCH);
2509 ph10 443 }
2510 nigel 77 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2511     ecode += 2;
2512     }
2513     break;
2514    
2515     /* Match a single character, caselessly */
2516    
2517     case OP_CHARNC:
2518     #ifdef SUPPORT_UTF8
2519     if (utf8)
2520     {
2521     length = 1;
2522     ecode++;
2523     GETCHARLEN(fc, ecode, length);
2524    
2525 ph10 443 if (length > md->end_subject - eptr)
2526 ph10 428 {
2527     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2528     RRETURN(MATCH_NOMATCH);
2529 ph10 443 }
2530 nigel 77
2531     /* If the pattern character's value is < 128, we have only one byte, and
2532     can use the fast lookup table. */
2533    
2534     if (fc < 128)
2535     {
2536     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2537     }
2538    
2539     /* Otherwise we must pick up the subject character */
2540    
2541     else
2542     {
2543 nigel 93 unsigned int dc;
2544 nigel 77 GETCHARINC(dc, eptr);
2545     ecode += length;
2546    
2547     /* If we have Unicode property support, we can use it to test the other
2548 nigel 87 case of the character, if there is one. */
2549 nigel 77
2550     if (fc != dc)
2551     {
2552     #ifdef SUPPORT_UCP
2553 ph10 349 if (dc != UCD_OTHERCASE(fc))
2554 nigel 77 #endif
2555     RRETURN(MATCH_NOMATCH);
2556     }
2557     }
2558     }
2559     else
2560     #endif /* SUPPORT_UTF8 */
2561    
2562     /* Non-UTF-8 mode */
2563     {
2564 ph10 443 if (md->end_subject - eptr < 1)
2565 ph10 428 {
2566 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2567 ph10 428 RRETURN(MATCH_NOMATCH);
2568 ph10 443 }
2569 nigel 77 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2570     ecode += 2;
2571     }
2572     break;
2573    
2574 nigel 93 /* Match a single character repeatedly. */
2575 nigel 77
2576     case OP_EXACT:
2577     min = max = GET2(ecode, 1);
2578     ecode += 3;
2579     goto REPEATCHAR;
2580    
2581 nigel 93 case OP_POSUPTO:
2582     possessive = TRUE;
2583     /* Fall through */
2584    
2585 nigel 77 case OP_UPTO:
2586     case OP_MINUPTO:
2587     min = 0;
2588     max = GET2(ecode, 1);
2589     minimize = *ecode == OP_MINUPTO;
2590     ecode += 3;
2591     goto REPEATCHAR;
2592    
2593 nigel 93 case OP_POSSTAR:
2594     possessive = TRUE;
2595     min = 0;
2596     max = INT_MAX;
2597     ecode++;
2598     goto REPEATCHAR;
2599    
2600     case OP_POSPLUS:
2601     possessive = TRUE;
2602     min = 1;
2603     max = INT_MAX;
2604     ecode++;
2605     goto REPEATCHAR;
2606    
2607     case OP_POSQUERY:
2608     possessive = TRUE;
2609     min = 0;
2610     max = 1;
2611     ecode++;
2612     goto REPEATCHAR;
2613    
2614 nigel 77 case OP_STAR:
2615     case OP_MINSTAR:
2616     case OP_PLUS:
2617     case OP_MINPLUS:
2618     case OP_QUERY:
2619     case OP_MINQUERY:
2620     c = *ecode++ - OP_STAR;
2621     minimize = (c & 1) != 0;
2622 ph10 443
2623 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2624     max = rep_max[c]; /* zero for max => infinity */
2625     if (max == 0) max = INT_MAX;
2626    
2627 ph10 426 /* Common code for all repeated single-character matches. */
2628 nigel 77
2629     REPEATCHAR:
2630     #ifdef SUPPORT_UTF8
2631     if (utf8)
2632     {
2633     length = 1;
2634     charptr = ecode;
2635     GETCHARLEN(fc, ecode, length);
2636     ecode += length;
2637    
2638     /* Handle multibyte character matching specially here. There is
2639     support for caseless matching if UCP support is present. */
2640    
2641     if (length > 1)
2642     {
2643     #ifdef SUPPORT_UCP
2644 nigel 93 unsigned int othercase;
2645 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2646 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2647 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2648 ph10 115 else oclength = 0;
2649 nigel 77 #endif /* SUPPORT_UCP */
2650    
2651     for (i = 1; i <= min; i++)
2652     {
2653 ph10 426 if (eptr <= md->end_subject - length &&
2654     memcmp(eptr, charptr, length) == 0) eptr += length;
2655 ph10 123 #ifdef SUPPORT_UCP
2656 ph10 426 else if (oclength > 0 &&
2657     eptr <= md->end_subject - oclength &&
2658     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2659     #endif /* SUPPORT_UCP */
2660 nigel 77 else
2661     {
2662 ph10 426 CHECK_PARTIAL();
2663     RRETURN(MATCH_NOMATCH);
2664 nigel 77 }
2665     }
2666    
2667     if (min == max) continue;
2668    
2669     if (minimize)
2670     {
2671     for (fi = min;; fi++)
2672     {
2673 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2674 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2675 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2676 ph10 426 if (eptr <= md->end_subject - length &&
2677     memcmp(eptr, charptr, length) == 0) eptr += length;
2678 ph10 123 #ifdef SUPPORT_UCP
2679 ph10 426 else if (oclength > 0 &&
2680     eptr <= md->end_subject - oclength &&
2681     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2682     #endif /* SUPPORT_UCP */
2683 nigel 77 else
2684     {
2685 ph10 426 CHECK_PARTIAL();
2686     RRETURN(MATCH_NOMATCH);
2687 nigel 77 }
2688     }
2689     /* Control never gets here */
2690     }
2691 nigel 93
2692     else /* Maximize */
2693 nigel 77 {
2694     pp = eptr;
2695     for (i = min; i < max; i++)
2696     {
2697 ph10 426 if (eptr <= md->end_subject - length &&
2698     memcmp(eptr, charptr, length) == 0) eptr += length;
2699 ph10 123 #ifdef SUPPORT_UCP
2700 ph10 426 else if (oclength > 0 &&
2701     eptr <= md->end_subject - oclength &&
2702     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2703     #endif /* SUPPORT_UCP */
2704 ph10 463 else
2705 ph10 462 {
2706 ph10 463 CHECK_PARTIAL();
2707 ph10 462 break;
2708 ph10 463 }
2709 nigel 77 }
2710 nigel 93
2711     if (possessive) continue;
2712 ph10 427
2713 ph10 120 for(;;)
2714 ph10 426 {
2715     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2716     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2717     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2718 ph10 115 #ifdef SUPPORT_UCP
2719 ph10 426 eptr--;
2720     BACKCHAR(eptr);
2721 ph10 123 #else /* without SUPPORT_UCP */
2722 ph10 426 eptr -= length;
2723 ph10 123 #endif /* SUPPORT_UCP */
2724 ph10 426 }
2725 nigel 77 }
2726     /* Control never gets here */
2727     }
2728    
2729     /* If the length of a UTF-8 character is 1, we fall through here, and
2730     obey the code as for non-UTF-8 characters below, though in this case the
2731     value of fc will always be < 128. */
2732     }
2733     else
2734     #endif /* SUPPORT_UTF8 */
2735    
2736     /* When not in UTF-8 mode, load a single-byte character. */
2737    
2738 ph10 426 fc = *ecode++;
2739 ph10 443
2740 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2741     may not be in UTF-8 mode. The code is duplicated for the caseless and
2742     caseful cases, for speed, since matching characters is likely to be quite
2743     common. First, ensure the minimum number of matches are present. If min =
2744     max, continue at the same level without recursing. Otherwise, if
2745     minimizing, keep trying the rest of the expression and advancing one
2746     matching character if failing, up to the maximum. Alternatively, if
2747     maximizing, find the maximum number of characters and work backwards. */
2748    
2749     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2750     max, eptr));
2751    
2752     if ((ims & PCRE_CASELESS) != 0)
2753     {
2754     fc = md->lcc[fc];
2755     for (i = 1; i <= min; i++)
2756 ph10 426 {
2757     if (eptr >= md->end_subject)
2758     {
2759     SCHECK_PARTIAL();
2760     RRETURN(MATCH_NOMATCH);
2761     }
2762 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2763 ph10 426 }
2764 nigel 77 if (min == max) continue;
2765     if (minimize)
2766     {
2767     for (fi = min;; fi++)
2768     {
2769 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2770 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2771 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2772 ph10 426 if (eptr >= md->end_subject)
2773     {
2774 ph10 427 SCHECK_PARTIAL();
2775 ph10 426 RRETURN(MATCH_NOMATCH);
2776     }
2777     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2778 nigel 77 }
2779     /* Control never gets here */
2780     }
2781 nigel 93 else /* Maximize */
2782 nigel 77 {
2783     pp = eptr;
2784     for (i = min; i < max; i++)
2785     {
2786 ph10 463 if (eptr >= md->end_subject)
2787 ph10 462 {
2788     SCHECK_PARTIAL();
2789     break;
2790 ph10 463 }
2791 ph10 462 if (fc != md->lcc[*eptr]) break;
2792 nigel 77 eptr++;
2793     }
2794 ph10 427
2795 nigel 93 if (possessive) continue;
2796 ph10 427
2797 nigel 77 while (eptr >= pp)
2798     {
2799 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2800 nigel 77 eptr--;
2801     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2802     }
2803     RRETURN(MATCH_NOMATCH);
2804     }
2805     /* Control never gets here */
2806     }
2807    
2808     /* Caseful comparisons (includes all multi-byte characters) */
2809    
2810     else
2811     {
2812 ph10 427 for (i = 1; i <= min; i++)
2813 ph10 426 {
2814     if (eptr >= md->end_subject)
2815     {
2816     SCHECK_PARTIAL();
2817     RRETURN(MATCH_NOMATCH);
2818     }
2819     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2820 ph10 427 }
2821 ph10 443
2822 nigel 77 if (min == max) continue;
2823 ph10 443
2824 nigel 77 if (minimize)
2825     {
2826     for (fi = min;; fi++)
2827     {
2828 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2829 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2830 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2831 ph10 426 if (eptr >= md->end_subject)
2832 ph10 427 {
2833 ph10 426 SCHECK_PARTIAL();
2834     RRETURN(MATCH_NOMATCH);
2835 ph10 427 }
2836 ph10 426 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2837 nigel 77 }
2838     /* Control never gets here */
2839     }
2840 nigel 93 else /* Maximize */
2841 nigel 77 {
2842     pp = eptr;
2843     for (i = min; i < max; i++)
2844     {
2845 ph10 463 if (eptr >= md->end_subject)
2846 ph10 462 {
2847 ph10 463 SCHECK_PARTIAL();
2848 ph10 462 break;
2849 ph10 463 }
2850 ph10 462 if (fc != *eptr) break;
2851 nigel 77 eptr++;
2852     }
2853 nigel 93 if (possessive) continue;
2854 ph10 443
2855 nigel 77 while (eptr >= pp)
2856     {
2857 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2858 nigel 77 eptr--;
2859     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2860     }
2861     RRETURN(MATCH_NOMATCH);
2862     }
2863     }
2864     /* Control never gets here */
2865    
2866     /* Match a negated single one-byte character. The character we are
2867     checking can be multibyte. */
2868    
2869     case OP_NOT:
2870 ph10 443 if (eptr >= md->end_subject)
2871 ph10 428 {
2872 ph10 443 SCHECK_PARTIAL();
2873 ph10 428 RRETURN(MATCH_NOMATCH);
2874 ph10 443 }
2875 nigel 77 ecode++;
2876     GETCHARINCTEST(c, eptr);
2877     if ((ims & PCRE_CASELESS) != 0)
2878     {
2879     #ifdef SUPPORT_UTF8
2880     if (c < 256)
2881     #endif
2882     c = md->lcc[c];
2883     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2884     }
2885     else
2886     {
2887     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2888     }
2889     break;
2890    
2891     /* Match a negated single one-byte character repeatedly. This is almost a
2892     repeat of the code for a repeated single character, but I haven't found a
2893     nice way of commoning these up that doesn't require a test of the
2894     positive/negative option for each character match. Maybe that wouldn't add
2895     very much to the time taken, but character matching *is* what this is all
2896     about... */
2897    
2898     case OP_NOTEXACT:
2899     min = max = GET2(ecode, 1);
2900     ecode += 3;
2901     goto REPEATNOTCHAR;
2902    
2903     case OP_NOTUPTO:
2904     case OP_NOTMINUPTO:
2905     min = 0;
2906     max = GET2(ecode, 1);
2907     minimize = *ecode == OP_NOTMINUPTO;
2908     ecode += 3;
2909     goto REPEATNOTCHAR;
2910    
2911 nigel 93 case OP_NOTPOSSTAR:
2912     possessive = TRUE;
2913     min = 0;
2914     max = INT_MAX;
2915     ecode++;
2916     goto REPEATNOTCHAR;
2917    
2918     case OP_NOTPOSPLUS:
2919     possessive = TRUE;
2920     min = 1;
2921     max = INT_MAX;
2922     ecode++;
2923     goto REPEATNOTCHAR;
2924    
2925     case OP_NOTPOSQUERY:
2926     possessive = TRUE;
2927     min = 0;
2928     max = 1;
2929     ecode++;
2930     goto REPEATNOTCHAR;
2931    
2932     case OP_NOTPOSUPTO:
2933     possessive = TRUE;
2934     min = 0;
2935     max = GET2(ecode, 1);
2936     ecode += 3;
2937     goto REPEATNOTCHAR;
2938    
2939 nigel 77 case OP_NOTSTAR:
2940     case OP_NOTMINSTAR:
2941     case OP_NOTPLUS:
2942     case OP_NOTMINPLUS:
2943     case OP_NOTQUERY:
2944     case OP_NOTMINQUERY:
2945     c = *ecode++ - OP_NOTSTAR;
2946     minimize = (c & 1) != 0;
2947     min = rep_min[c]; /* Pick up values from tables; */
2948     max = rep_max[c]; /* zero for max => infinity */
2949     if (max == 0) max = INT_MAX;
2950    
2951 ph10 426 /* Common code for all repeated single-byte matches. */
2952 nigel 77
2953     REPEATNOTCHAR:
2954     fc = *ecode++;
2955    
2956     /* The code is duplicated for the caseless and caseful cases, for speed,
2957     since matching characters is likely to be quite common. First, ensure the
2958     minimum number of matches are present. If min = max, continue at the same
2959     level without recursing. Otherwise, if minimizing, keep trying the rest of
2960     the expression and advancing one matching character if failing, up to the
2961     maximum. Alternatively, if maximizing, find the maximum number of
2962     characters and work backwards. */
2963    
2964     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2965     max, eptr));
2966    
2967     if ((ims & PCRE_CASELESS) != 0)
2968     {
2969     fc = md->lcc[fc];
2970    
2971     #ifdef SUPPORT_UTF8
2972     /* UTF-8 mode */
2973     if (utf8)
2974     {
2975 nigel 93 register unsigned int d;
2976 nigel 77 for (i = 1; i <= min; i++)
2977     {
2978 ph10 426 if (eptr >= md->end_subject)
2979     {
2980     SCHECK_PARTIAL();
2981 ph10 427 RRETURN(MATCH_NOMATCH);
2982     }
2983 nigel 77 GETCHARINC(d, eptr);
2984     if (d < 256) d = md->lcc[d];
2985     if (fc == d) RRETURN(MATCH_NOMATCH);
2986     }
2987     }
2988     else
2989     #endif
2990    
2991     /* Not UTF-8 mode */
2992     {
2993     for (i = 1; i <= min; i++)
2994 ph10 426 {
2995     if (eptr >= md->end_subject)
2996     {
2997     SCHECK_PARTIAL();
2998 ph10 427 RRETURN(MATCH_NOMATCH);
2999     }
3000 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3001 ph10 427 }
3002 nigel 77 }
3003    
3004     if (min == max) continue;
3005    
3006     if (minimize)
3007     {
3008     #ifdef SUPPORT_UTF8
3009     /* UTF-8 mode */
3010     if (utf8)
3011     {
3012 nigel 93 register unsigned int d;
3013 nigel 77 for (fi = min;; fi++)
3014     {
3015 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3016 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3017 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3018 ph10 427 if (eptr >= md->end_subject)
3019 ph10 426 {
3020 ph10 427 SCHECK_PARTIAL();
3021 ph10 426 RRETURN(MATCH_NOMATCH);
3022 ph10 427 }
3023 nigel 77 GETCHARINC(d, eptr);
3024     if (d < 256) d = md->lcc[d];
3025 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
3026 nigel 77 }
3027     }
3028     else
3029     #endif
3030     /* Not UTF-8 mode */
3031     {
3032     for (fi = min;; fi++)
3033     {
3034 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3035 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3036 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3037 ph10 426 if (eptr >= md->end_subject)
3038     {
3039     SCHECK_PARTIAL();
3040     RRETURN(MATCH_NOMATCH);
3041     }
3042     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3043 nigel 77 }
3044     }
3045     /* Control never gets here */
3046     }
3047    
3048     /* Maximize case */
3049    
3050     else
3051     {
3052     pp = eptr;
3053    
3054     #ifdef SUPPORT_UTF8
3055     /* UTF-8 mode */
3056     if (utf8)
3057     {
3058 nigel 93 register unsigned int d;
3059 nigel 77 for (i = min; i < max; i++)
3060     {
3061     int len = 1;
3062 ph10 463 if (eptr >= md->end_subject)
3063 ph10 462 {
3064 ph10 463 SCHECK_PARTIAL();
3065 ph10 462 break;
3066 ph10 463 }
3067 nigel 77 GETCHARLEN(d, eptr, len);
3068     if (d < 256) d = md->lcc[d];
3069     if (fc == d) break;
3070     eptr += len;
3071     }
3072 nigel 93 if (possessive) continue;
3073     for(;;)
3074 nigel 77 {
3075 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3076 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3077     if (eptr-- == pp) break; /* Stop if tried at original pos */
3078     BACKCHAR(eptr);
3079     }
3080     }
3081     else
3082     #endif
3083     /* Not UTF-8 mode */
3084     {
3085     for (i = min; i < max; i++)
3086     {
3087 ph10 463 if (eptr >= md->end_subject)
3088 ph10 462 {
3089     SCHECK_PARTIAL();
3090     break;
3091 ph10 463 }
3092 ph10 462 if (fc == md->lcc[*eptr]) break;
3093 nigel 77 eptr++;
3094     }
3095 nigel 93 if (possessive) continue;
3096 nigel 77 while (eptr >= pp)
3097     {
3098 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3099 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3100     eptr--;
3101     }
3102     }
3103    
3104     RRETURN(MATCH_NOMATCH);
3105     }
3106     /* Control never gets here */
3107     }
3108    
3109     /* Caseful comparisons */
3110    
3111     else
3112     {
3113     #ifdef SUPPORT_UTF8
3114     /* UTF-8 mode */
3115     if (utf8)
3116     {
3117 nigel 93 register unsigned int d;
3118 nigel 77 for (i = 1; i <= min; i++)
3119     {
3120 ph10 426 if (eptr >= md->end_subject)
3121     {
3122     SCHECK_PARTIAL();
3123 ph10 427 RRETURN(MATCH_NOMATCH);
3124     }
3125 nigel 77 GETCHARINC(d, eptr);
3126     if (fc == d) RRETURN(MATCH_NOMATCH);
3127     }
3128     }
3129     else
3130     #endif
3131     /* Not UTF-8 mode */
3132     {
3133     for (i = 1; i <= min; i++)
3134 ph10 426 {
3135     if (eptr >= md->end_subject)
3136     {
3137     SCHECK_PARTIAL();
3138 ph10 427 RRETURN(MATCH_NOMATCH);
3139     }
3140 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3141 ph10 427 }
3142 nigel 77 }
3143    
3144     if (min == max) continue;
3145    
3146     if (minimize)
3147     {
3148     #ifdef SUPPORT_UTF8
3149     /* UTF-8 mode */
3150     if (utf8)
3151     {
3152 nigel 93 register unsigned int d;
3153 nigel 77 for (fi = min;; fi++)
3154     {
3155 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3156 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3157 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3158 ph10 427 if (eptr >= md->end_subject)
3159 ph10 426 {
3160 ph10 427 SCHECK_PARTIAL();
3161 ph10 426 RRETURN(MATCH_NOMATCH);
3162 ph10 427 }
3163 nigel 77 GETCHARINC(d, eptr);
3164 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
3165 nigel 77 }
3166     }
3167     else
3168     #endif
3169     /* Not UTF-8 mode */
3170     {
3171     for (fi = min;; fi++)
3172     {
3173 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3174 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3175 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3176 ph10 426 if (eptr >= md->end_subject)
3177     {
3178     SCHECK_PARTIAL();
3179     RRETURN(MATCH_NOMATCH);
3180 ph10 427 }
3181 ph10 426 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3182 nigel 77 }
3183     }
3184     /* Control never gets here */
3185     }
3186    
3187     /* Maximize case */
3188    
3189     else
3190     {
3191     pp = eptr;
3192    
3193     #ifdef SUPPORT_UTF8
3194     /* UTF-8 mode */
3195     if (utf8)
3196     {
3197 nigel 93 register unsigned int d;
3198 nigel 77 for (i = min; i < max; i++)
3199     {
3200     int len = 1;
3201 ph10 463 if (eptr >= md->end_subject)
3202 ph10 462 {
3203 ph10 463 SCHECK_PARTIAL();
3204 ph10 462 break;
3205 ph10 463 }
3206 nigel 77 GETCHARLEN(d, eptr, len);
3207     if (fc == d) break;
3208     eptr += len;
3209     }
3210 nigel 93 if (possessive) continue;
3211 nigel 77 for(;;)
3212     {
3213 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3214 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3215     if (eptr-- == pp) break; /* Stop if tried at original pos */
3216     BACKCHAR(eptr);
3217     }
3218     }
3219     else
3220     #endif
3221     /* Not UTF-8 mode */
3222     {
3223     for (i = min; i < max; i++)
3224     {
3225 ph10 463 if (eptr >= md->end_subject)
3226 ph10 462 {
3227 ph10 463 SCHECK_PARTIAL();
3228 ph10 462 break;
3229 ph10 463 }
3230 ph10 462 if (fc == *eptr) break;
3231 nigel 77 eptr++;
3232     }
3233 nigel 93 if (possessive) continue;
3234 nigel 77 while (eptr >= pp)
3235     {
3236 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3237 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3238     eptr--;
3239     }
3240     }
3241    
3242     RRETURN(MATCH_NOMATCH);
3243     }
3244     }
3245     /* Control never gets here */
3246    
3247     /* Match a single character type repeatedly; several different opcodes
3248     share code. This is very similar to the code for single characters, but we
3249     repeat it in the interests of efficiency. */
3250    
3251     case OP_TYPEEXACT:
3252     min = max = GET2(ecode, 1);
3253     minimize = TRUE;
3254     ecode += 3;
3255     goto REPEATTYPE;
3256    
3257     case OP_TYPEUPTO:
3258     case OP_TYPEMINUPTO:
3259     min = 0;
3260     max = GET2(ecode, 1);
3261     minimize = *ecode == OP_TYPEMINUPTO;
3262     ecode += 3;
3263     goto REPEATTYPE;
3264    
3265 nigel 93 case OP_TYPEPOSSTAR:
3266     possessive = TRUE;
3267     min = 0;
3268     max = INT_MAX;
3269     ecode++;
3270     goto REPEATTYPE;
3271    
3272     case OP_TYPEPOSPLUS:
3273     possessive = TRUE;
3274     min = 1;
3275     max = INT_MAX;
3276     ecode++;
3277     goto REPEATTYPE;
3278    
3279     case OP_TYPEPOSQUERY:
3280     possessive = TRUE;
3281     min = 0;
3282     max = 1;
3283     ecode++;
3284     goto REPEATTYPE;
3285    
3286     case OP_TYPEPOSUPTO:
3287     possessive = TRUE;
3288     min = 0;
3289     max = GET2(ecode, 1);
3290     ecode += 3;
3291     goto REPEATTYPE;
3292    
3293 nigel 77 case OP_TYPESTAR:
3294     case OP_TYPEMINSTAR:
3295     case OP_TYPEPLUS:
3296     case OP_TYPEMINPLUS:
3297     case OP_TYPEQUERY:
3298     case OP_TYPEMINQUERY:
3299     c = *ecode++ - OP_TYPESTAR;
3300     minimize = (c & 1) != 0;
3301     min = rep_min[c]; /* Pick up values from tables; */
3302     max = rep_max[c]; /* zero for max => infinity */
3303     if (max == 0) max = INT_MAX;
3304    
3305     /* Common code for all repeated single character type matches. Note that
3306     in UTF-8 mode, '.' matches a character of any length, but for the other
3307     character types, the valid characters are all one-byte long. */
3308    
3309     REPEATTYPE:
3310     ctype = *ecode++; /* Code for the character type */
3311    
3312     #ifdef SUPPORT_UCP
3313     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3314     {
3315     prop_fail_result = ctype == OP_NOTPROP;
3316     prop_type = *ecode++;
3317 nigel 87 prop_value = *ecode++;
3318 nigel 77 }
3319     else prop_type = -1;
3320     #endif
3321    
3322     /* First, ensure the minimum number of matches are present. Use inline
3323     code for maximizing the speed, and do the type test once at the start
3324 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3325 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3326     and single-bytes. */
3327    
3328     if (min > 0)
3329     {
3330     #ifdef SUPPORT_UCP
3331 nigel 87 if (prop_type >= 0)
3332 nigel 77 {
3333 nigel 87 switch(prop_type)
3334 nigel 77 {
3335 nigel 87 case PT_ANY:
3336     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3337     for (i = 1; i <= min; i++)
3338     {
3339 ph10 427 if (eptr >= md->end_subject)
3340 ph10 426 {
3341 ph10 427 SCHECK_PARTIAL();
3342 ph10 426 RRETURN(MATCH_NOMATCH);
3343 ph10 427 }
3344 ph10 184 GETCHARINCTEST(c, eptr);
3345 nigel 87 }
3346     break;
3347    
3348     case PT_LAMP:
3349     for (i = 1; i <= min; i++)
3350     {
3351 ph10 427 if (eptr >= md->end_subject)
3352 ph10 426 {
3353 ph10 427 SCHECK_PARTIAL();
3354 ph10 426 RRETURN(MATCH_NOMATCH);
3355 ph10 427 }
3356 ph10 184 GETCHARINCTEST(c, eptr);
3357 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3358 nigel 87 if ((prop_chartype == ucp_Lu ||
3359     prop_chartype == ucp_Ll ||
3360     prop_chartype == ucp_Lt) == prop_fail_result)
3361     RRETURN(MATCH_NOMATCH);
3362     }
3363     break;
3364    
3365     case PT_GC:
3366     for (i = 1; i <= min; i++)
3367     {
3368 ph10 427 if (eptr >= md->end_subject)
3369 ph10 426 {
3370 ph10 427 SCHECK_PARTIAL();
3371 ph10 426 RRETURN(MATCH_NOMATCH);
3372 ph10 427 }
3373 ph10 184 GETCHARINCTEST(c, eptr);
3374 ph10 349 prop_category = UCD_CATEGORY(c);
3375 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3376     RRETURN(MATCH_NOMATCH);
3377     }
3378     break;
3379    
3380     case PT_PC:
3381     for (i = 1; i <= min; i++)
3382     {
3383 ph10 427 if (eptr >= md->end_subject)
3384 ph10 426 {
3385 ph10 427 SCHECK_PARTIAL();
3386 ph10 426 RRETURN(MATCH_NOMATCH);
3387 ph10 427 }
3388 ph10 184 GETCHARINCTEST(c, eptr);
3389 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3390 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3391     RRETURN(MATCH_NOMATCH);
3392     }
3393     break;
3394    
3395     case PT_SC:
3396     for (i = 1; i <= min; i++)
3397     {
3398 ph10 427 if (eptr >= md->end_subject)
3399 ph10 426 {
3400 ph10 427 SCHECK_PARTIAL();
3401 ph10 426 RRETURN(MATCH_NOMATCH);
3402 ph10 427 }
3403 ph10 184 GETCHARINCTEST(c, eptr);
3404 ph10 349 prop_script = UCD_SCRIPT(c);
3405 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3406     RRETURN(MATCH_NOMATCH);
3407     }
3408     break;
3409    
3410     default:
3411     RRETURN(PCRE_ERROR_INTERNAL);
3412 nigel 77 }
3413     }
3414    
3415     /* Match extended Unicode sequences. We will get here only if the
3416     support is in the binary; otherwise a compile-time error occurs. */
3417    
3418     else if (ctype == OP_EXTUNI)
3419     {
3420     for (i = 1; i <= min; i++)
3421     {
3422 ph10 427 if (eptr >= md->end_subject)
3423 ph10 426 {
3424 ph10 427 SCHECK_PARTIAL();
3425 ph10 426 RRETURN(MATCH_NOMATCH);
3426 ph10 427 }
3427 nigel 77 GETCHARINCTEST(c, eptr);
3428 ph10 349 prop_category = UCD_CATEGORY(c);
3429 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3430     while (eptr < md->end_subject)
3431     {
3432     int len = 1;
3433 ph10 426 if (!utf8) c = *eptr;
3434     else { GETCHARLEN(c, eptr, len); }
3435 ph10 349 prop_category = UCD_CATEGORY(c);
3436 nigel 77 if (prop_category != ucp_M) break;
3437     eptr += len;
3438     }
3439     }
3440     }
3441    
3442     else
3443     #endif /* SUPPORT_UCP */
3444    
3445     /* Handle all other cases when the coding is UTF-8 */
3446    
3447     #ifdef SUPPORT_UTF8
3448     if (utf8) switch(ctype)
3449     {
3450     case OP_ANY:
3451     for (i = 1; i <= min; i++)
3452     {
3453 ph10 426 if (eptr >= md->end_subject)
3454     {
3455 ph10 427 SCHECK_PARTIAL();
3456 nigel 77 RRETURN(MATCH_NOMATCH);
3457 ph10 427 }
3458 ph10 426 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3459 nigel 91 eptr++;
3460 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3461     }
3462     break;
3463    
3464 ph10 341 case OP_ALLANY:
3465     for (i = 1; i <= min; i++)
3466     {
3467 ph10 427 if (eptr >= md->end_subject)
3468 ph10 426 {
3469     SCHECK_PARTIAL();
3470     RRETURN(MATCH_NOMATCH);
3471 ph10 427 }
3472 ph10 341 eptr++;
3473     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3474     }
3475     break;
3476    
3477 nigel 77 case OP_ANYBYTE:
3478 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3479 nigel 77 eptr += min;
3480     break;
3481    
3482 nigel 93 case OP_ANYNL:
3483     for (i = 1; i <= min; i++)
3484     {
3485 ph10 427 if (eptr >= md->end_subject)
3486 ph10 426 {
3487     SCHECK_PARTIAL();
3488     RRETURN(MATCH_NOMATCH);
3489 ph10 427 }
3490 nigel 93 GETCHARINC(c, eptr);
3491     switch(c)
3492     {
3493     default: RRETURN(MATCH_NOMATCH);
3494     case 0x000d:
3495     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3496     break;
3497 ph10 231
3498 nigel 93 case 0x000a:
3499 ph10 231 break;
3500    
3501 nigel 93 case 0x000b:
3502     case 0x000c:
3503     case 0x0085:
3504     case 0x2028:
3505     case 0x2029:
3506 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3507 nigel 93 break;
3508     }
3509     }
3510     break;
3511    
3512 ph10 178 case OP_NOT_HSPACE:
3513     for (i = 1; i <= min; i++)
3514     {
3515 ph10 427 if (eptr >= md->end_subject)
3516 ph10 426 {
3517     SCHECK_PARTIAL();
3518     RRETURN(MATCH_NOMATCH);
3519 ph10 427 }
3520 ph10 178 GETCHARINC(c, eptr);
3521     switch(c)
3522     {
3523     default: break;
3524     case 0x09: /* HT */
3525     case 0x20: /* SPACE */
3526     case 0xa0: /* NBSP */
3527     case 0x1680: /* OGHAM SPACE MARK */
3528     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3529     case 0x2000: /* EN QUAD */
3530     case 0x2001: /* EM QUAD */
3531     case 0x2002: /* EN SPACE */
3532     case 0x2003: /* EM SPACE */
3533     case 0x2004: /* THREE-PER-EM SPACE */
3534     case 0x2005: /* FOUR-PER-EM SPACE */
3535     case 0x2006: /* SIX-PER-EM SPACE */
3536     case 0x2007: /* FIGURE SPACE */
3537     case 0x2008: /* PUNCTUATION SPACE */
3538     case 0x2009: /* THIN SPACE */
3539     case 0x200A: /* HAIR SPACE */
3540     case 0x202f: /* NARROW NO-BREAK SPACE */
3541     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3542     case 0x3000: /* IDEOGRAPHIC SPACE */
3543     RRETURN(MATCH_NOMATCH);
3544     }
3545     }
3546     break;
3547 ph10 182
3548 ph10 178 case OP_HSPACE:
3549     for (i = 1; i <= min; i++)
3550     {
3551 ph10 427 if (eptr >= md->end_subject)
3552 ph10 426 {
3553 ph10 427 SCHECK_PARTIAL();
3554 ph10 426 RRETURN(MATCH_NOMATCH);
3555 ph10 427 }
3556 ph10 178 GETCHARINC(c, eptr);
3557     switch(c)
3558     {
3559     default: RRETURN(MATCH_NOMATCH);
3560     case 0x09: /* HT */
3561     case 0x20: /* SPACE */
3562     case 0xa0: /* NBSP */
3563     case 0x1680: /* OGHAM SPACE MARK */
3564     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3565     case 0x2000: /* EN QUAD */
3566     case 0x2001: /* EM QUAD */
3567     case 0x2002: /* EN SPACE */
3568     case 0x2003: /* EM SPACE */
3569     case 0x2004: /* THREE-PER-EM SPACE */
3570     case 0x2005: /* FOUR-PER-EM SPACE */
3571     case 0x2006: /* SIX-PER-EM SPACE */
3572     case 0x2007: /* FIGURE SPACE */
3573     case 0x2008: /* PUNCTUATION SPACE */
3574     case 0x2009: /* THIN SPACE */
3575     case 0x200A: /* HAIR SPACE */
3576     case 0x202f: /* NARROW NO-BREAK SPACE */
3577     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3578     case 0x3000: /* IDEOGRAPHIC SPACE */
3579     break;
3580     }
3581     }
3582     break;
3583 ph10 182
3584 ph10 178 case OP_NOT_VSPACE:
3585     for (i = 1; i <= min; i++)
3586     {
3587 ph10 427 if (eptr >= md->end_subject)
3588 ph10 426 {
3589 ph10 427 SCHECK_PARTIAL();
3590 ph10 426 RRETURN(MATCH_NOMATCH);
3591 ph10 427 }
3592 ph10 178 GETCHARINC(c, eptr);
3593     switch(c)
3594     {
3595     default: break;
3596     case 0x0a: /* LF */
3597     case 0x0b: /* VT */
3598     case 0x0c: /* FF */
3599     case 0x0d: /* CR */
3600     case 0x85: /* NEL */
3601     case 0x2028: /* LINE SEPARATOR */
3602     case 0x2029: /* PARAGRAPH SEPARATOR */
3603     RRETURN(MATCH_NOMATCH);
3604     }
3605     }
3606     break;
3607 ph10 182
3608 ph10 178 case OP_VSPACE:
3609     for (i = 1; i <= min; i++)
3610     {
3611 ph10 427 if (eptr >= md->end_subject)
3612 ph10 426 {
3613 ph10 427 SCHECK_PARTIAL();
3614 ph10 426 RRETURN(MATCH_NOMATCH);
3615 ph10 427 }
3616 ph10 178 GETCHARINC(c, eptr);
3617     switch(c)
3618     {
3619     default: RRETURN(MATCH_NOMATCH);
3620     case 0x0a: /* LF */
3621     case 0x0b: /* VT */
3622     case 0x0c: /* FF */
3623     case 0x0d: /* CR */
3624     case 0x85: /* NEL */
3625     case 0x2028: /* LINE SEPARATOR */
3626     case 0x2029: /* PARAGRAPH SEPARATOR */
3627 ph10 182 break;
3628 ph10 178 }
3629     }
3630     break;
3631    
3632 nigel 77 case OP_NOT_DIGIT:
3633     for (i = 1; i <= min; i++)
3634     {
3635 ph10 427 if (eptr >= md->end_subject)
3636 ph10 426 {
3637 ph10 427 SCHECK_PARTIAL();
3638 ph10 426 RRETURN(MATCH_NOMATCH);
3639 ph10 427 }
3640 nigel 77 GETCHARINC(c, eptr);
3641     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3642     RRETURN(MATCH_NOMATCH);
3643     }
3644     break;
3645    
3646     case OP_DIGIT:
3647     for (i = 1; i <= min; i++)
3648     {
3649 ph10 427 if (eptr >= md->end_subject)
3650 ph10 426 {
3651 ph10 427 SCHECK_PARTIAL();
3652 nigel 77 RRETURN(MATCH_NOMATCH);
3653 ph10 427 }
3654 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3655     RRETURN(MATCH_NOMATCH);
3656 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3657     }
3658     break;
3659    
3660     case OP_NOT_WHITESPACE:
3661     for (i = 1; i <= min; i++)
3662     {
3663 ph10 427 if (eptr >= md->end_subject)
3664 ph10 426 {
3665 ph10 427 SCHECK_PARTIAL();
3666 nigel 77 RRETURN(MATCH_NOMATCH);
3667 ph10 427 }
3668 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3669     RRETURN(MATCH_NOMATCH);
3670 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3671 nigel 77 }
3672     break;
3673    
3674     case OP_WHITESPACE:
3675     for (i = 1; i <= min; i++)
3676     {
3677 ph10 427 if (eptr >= md->end_subject)
3678 ph10 426 {
3679 ph10 427 SCHECK_PARTIAL();
3680 nigel 77 RRETURN(MATCH_NOMATCH);
3681 ph10 427 }
3682 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3683     RRETURN(MATCH_NOMATCH);
3684 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3685     }
3686     break;
3687    
3688     case OP_NOT_WORDCHAR:
3689     for (i = 1; i <= min; i++)
3690     {
3691     if (eptr >= md->end_subject ||
3692 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3693 nigel 77 RRETURN(MATCH_NOMATCH);
3694 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3695 nigel 77 }
3696     break;
3697    
3698     case OP_WORDCHAR:
3699     for (i = 1; i <= min; i++)
3700     {
3701 ph10 427 if (eptr >= md->end_subject)
3702 ph10 426 {
3703 ph10 427 SCHECK_PARTIAL();
3704 nigel 77 RRETURN(MATCH_NOMATCH);
3705 ph10 427 }
3706 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3707     RRETURN(MATCH_NOMATCH);
3708 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3709     }
3710     break;
3711    
3712     default:
3713     RRETURN(PCRE_ERROR_INTERNAL);
3714     } /* End switch(ctype) */
3715    
3716     else
3717     #endif /* SUPPORT_UTF8 */
3718    
3719     /* Code for the non-UTF-8 case for minimum matching of operators other
3720 ph10 426 than OP_PROP and OP_NOTPROP. */
3721 nigel 77
3722     switch(ctype)
3723     {
3724     case OP_ANY:
3725 ph10 342 for (i = 1; i <= min; i++)
3726 nigel 77 {
3727 ph10 427 if (eptr >= md->end_subject)
3728 ph10 426 {
3729 ph10 427 SCHECK_PARTIAL();
3730 ph10 426 RRETURN(MATCH_NOMATCH);
3731 ph10 427 }
3732 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3733     eptr++;
3734 nigel 77 }
3735     break;
3736    
3737 ph10 341 case OP_ALLANY:
3738 ph10 443 if (eptr > md->end_subject - min)
3739 ph10 428 {
3740 ph10 443 SCHECK_PARTIAL();
3741 ph10 428 RRETURN(MATCH_NOMATCH);
3742 ph10 443 }
3743 ph10 341 eptr += min;
3744     break;
3745    
3746 nigel 77 case OP_ANYBYTE:
3747 ph10 443 if (eptr > md->end_subject - min)
3748 ph10 428 {
3749 ph10 443 SCHECK_PARTIAL();
3750 ph10 428 RRETURN(MATCH_NOMATCH);
3751 ph10 443 }
3752 nigel 77 eptr += min;
3753     break;
3754    
3755 nigel 93 case OP_ANYNL:
3756     for (i = 1; i <= min; i++)
3757     {
3758 ph10 427 if (eptr >= md->end_subject)
3759 ph10 426 {
3760 ph10 427 SCHECK_PARTIAL();
3761 ph10 426 RRETURN(MATCH_NOMATCH);
3762 ph10 427 }
3763 nigel 93 switch(*eptr++)
3764     {
3765     default: RRETURN(MATCH_NOMATCH);
3766     case 0x000d:
3767     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3768     break;
3769     case 0x000a:
3770 ph10 231 break;
3771    
3772 nigel 93 case 0x000b:
3773     case 0x000c:
3774     case 0x0085:
3775 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3776 nigel 93 break;
3777     }
3778     }
3779     break;
3780    
3781 ph10 178 case OP_NOT_HSPACE:
3782     for (i = 1; i <= min; i++)
3783     {
3784 ph10 427 if (eptr >= md->end_subject)
3785 ph10 426 {
3786 ph10 427 SCHECK_PARTIAL();
3787 ph10 426 RRETURN(MATCH_NOMATCH);
3788 ph10 427 }
3789 ph10 178 switch(*eptr++)
3790     {
3791     default: break;
3792     case 0x09: /* HT */
3793     case 0x20: /* SPACE */
3794     case 0xa0: /* NBSP */
3795     RRETURN(MATCH_NOMATCH);
3796     }
3797     }
3798     break;
3799    
3800     case OP_HSPACE:
3801     for (i = 1; i <= min; i++)
3802     {
3803 ph10 427 if (eptr >= md->end_subject)
3804 ph10 426 {
3805 ph10 427 SCHECK_PARTIAL();
3806 ph10 426 RRETURN(MATCH_NOMATCH);
3807 ph10 427 }
3808 ph10 178 switch(*eptr++)
3809     {
3810     default: RRETURN(MATCH_NOMATCH);
3811     case 0x09: /* HT */
3812     case 0x20: /* SPACE */
3813     case 0xa0: /* NBSP */
3814 ph10 182 break;
3815 ph10 178 }
3816     }
3817     break;
3818    
3819     case OP_NOT_VSPACE:
3820     for (i = 1; i <= min; i++)
3821     {
3822 ph10 427 if (eptr >= md->end_subject)
3823 ph10 426 {
3824 ph10 427 SCHECK_PARTIAL();
3825 ph10 426 RRETURN(MATCH_NOMATCH);
3826 ph10 427 }
3827 ph10 178 switch(*eptr++)
3828     {
3829     default: break;
3830     case 0x0a: /* LF */
3831     case 0x0b: /* VT */
3832     case 0x0c: /* FF */
3833     case 0x0d: /* CR */
3834     case 0x85: /* NEL */
3835     RRETURN(MATCH_NOMATCH);
3836     }
3837     }
3838     break;
3839    
3840     case OP_VSPACE:
3841     for (i = 1; i <= min; i++)
3842     {
3843 ph10 427 if (eptr >= md->end_subject)
3844 ph10 426 {
3845 ph10 427 SCHECK_PARTIAL();
3846 ph10 426 RRETURN(MATCH_NOMATCH);
3847 ph10 427 }
3848 ph10 178 switch(*eptr++)
3849     {
3850     default: RRETURN(MATCH_NOMATCH);
3851     case 0x0a: /* LF */
3852     case 0x0b: /* VT */
3853     case 0x0c: /* FF */
3854     case 0x0d: /* CR */
3855     case 0x85: /* NEL */
3856 ph10 182 break;
3857 ph10 178 }
3858     }
3859     break;
3860    
3861 nigel 77 case OP_NOT_DIGIT:
3862     for (i = 1; i <= min; i++)
3863 ph10 427 {
3864     if (eptr >= md->end_subject)
3865 ph10 426 {
3866 ph10 427 SCHECK_PARTIAL();
3867 ph10 426 RRETURN(MATCH_NOMATCH);
3868 ph10 427 }
3869 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3870 ph10 427 }
3871 nigel 77 break;
3872    
3873     case OP_DIGIT:
3874     for (i = 1; i <= min; i++)
3875 ph10 427 {
3876     if (eptr >= md->end_subject)
3877 ph10 426 {
3878 ph10 427 SCHECK_PARTIAL();
3879 ph10 426 RRETURN(MATCH_NOMATCH);
3880 ph10 427 }
3881 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3882 ph10 427 }
3883 nigel 77 break;
3884    
3885     case OP_NOT_WHITESPACE:
3886     for (i = 1; i <= min; i++)
3887 ph10 427 {
3888     if (eptr >= md->end_subject)
3889 ph10 426 {
3890 ph10 427 SCHECK_PARTIAL();
3891 ph10 426 RRETURN(MATCH_NOMATCH);
3892 ph10 427 }
3893 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3894 ph10 427 }
3895 nigel 77 break;
3896    
3897     case OP_WHITESPACE:
3898     for (i = 1; i <= min; i++)
3899 ph10 427 {
3900     if (eptr >= md->end_subject)
3901 ph10 426 {
3902 ph10 427 SCHECK_PARTIAL();
3903 ph10 426 RRETURN(MATCH_NOMATCH);
3904 ph10 427 }
3905 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3906 ph10 427 }
3907 nigel 77 break;
3908    
3909     case OP_NOT_WORDCHAR:
3910     for (i = 1; i <= min; i++)
3911 ph10 427 {
3912     if (eptr >= md->end_subject)
3913 ph10 426 {
3914 ph10 427 SCHECK_PARTIAL();
3915 ph10 426 RRETURN(MATCH_NOMATCH);
3916 ph10 427 }
3917 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3918     RRETURN(MATCH_NOMATCH);
3919 ph10 427 }
3920 nigel 77 break;
3921    
3922     case OP_WORDCHAR:
3923     for (i = 1; i <= min; i++)
3924 ph10 427 {
3925     if (eptr >= md->end_subject)
3926 ph10 426 {
3927 ph10 427 SCHECK_PARTIAL();
3928 ph10 426 RRETURN(MATCH_NOMATCH);
3929 ph10 427 }
3930 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3931     RRETURN(MATCH_NOMATCH);
3932 ph10 427 }
3933 nigel 77 break;
3934    
3935     default:
3936     RRETURN(PCRE_ERROR_INTERNAL);
3937     }
3938     }
3939    
3940     /* If min = max, continue at the same level without recursing */
3941    
3942     if (min == max) continue;
3943    
3944     /* If minimizing, we have to test the rest of the pattern before each
3945     subsequent match. Again, separate the UTF-8 case for speed, and also
3946     separate the UCP cases. */
3947    
3948     if (minimize)
3949     {
3950     #ifdef SUPPORT_UCP
3951 nigel 87 if (prop_type >= 0)
3952 nigel 77 {
3953 nigel 87 switch(prop_type)
3954 nigel 77 {
3955 nigel 87 case PT_ANY:
3956     for (fi = min;; fi++)
3957     {
3958 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3959 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3960 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3961 ph10 427 if (eptr >= md->end_subject)
3962 ph10 426 {
3963 ph10 427 SCHECK_PARTIAL();
3964 ph10 426 RRETURN(MATCH_NOMATCH);
3965 ph10 427 }
3966 nigel 87 GETCHARINC(c, eptr);
3967     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3968     }
3969 nigel 93 /* Control never gets here */
3970 nigel 87
3971     case PT_LAMP:
3972     for (fi = min;; fi++)
3973     {
3974 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3975 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3976 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3977 ph10 427 if (eptr >= md->end_subject)
3978 ph10 426 {
3979 ph10 427 SCHECK_PARTIAL();
3980 ph10 426 RRETURN(MATCH_NOMATCH);
3981 ph10 427 }
3982 nigel 87 GETCHARINC(c, eptr);
3983 ph10