/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 500 - (hide annotations) (download)
Sat Mar 6 19:00:29 2010 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 171227 byte(s)
Fix bugs with \K in atomic groups, subroutines, and assertions.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92 ph10 475 #ifdef PCRE_DEBUG
93 nigel 77 /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144 ph10 475 #ifdef PCRE_DEBUG
145 nigel 77 if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 164 actuall used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 ph10 475 #ifdef PCRE_DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 164 newframe->Xoffset_top = rc;\
292     newframe->Xims = re;\
293     newframe->Xeptrb = rf;\
294     newframe->Xflags = rg;\
295     newframe->Xrdepth = frame->Xrdepth + 1;\
296     newframe->Xprevframe = frame;\
297     frame = newframe;\
298     DPRINTF(("restarting from line %d\n", __LINE__));\
299     goto HEAP_RECURSE;\
300     L_##rw:\
301     DPRINTF(("jumped back to line %d\n", __LINE__));\
302 nigel 77 }
303    
304     #define RRETURN(ra)\
305     {\
306     heapframe *newframe = frame;\
307     frame = newframe->Xprevframe;\
308     (pcre_stack_free)(newframe);\
309     if (frame != NULL)\
310     {\
311 ph10 164 rrc = ra;\
312     goto HEAP_RETURN;\
313 nigel 77 }\
314     return ra;\
315     }
316    
317    
318     /* Structure for remembering the local variables in a private frame */
319    
320     typedef struct heapframe {
321     struct heapframe *Xprevframe;
322    
323     /* Function arguments that may change */
324    
325 ph10 409 USPTR Xeptr;
326 nigel 77 const uschar *Xecode;
327 ph10 409 USPTR Xmstart;
328 nigel 77 int Xoffset_top;
329     long int Xims;
330     eptrblock *Xeptrb;
331     int Xflags;
332 nigel 91 unsigned int Xrdepth;
333 nigel 77
334     /* Function local variables */
335    
336 ph10 409 USPTR Xcallpat;
337 ph10 406 #ifdef SUPPORT_UTF8
338 ph10 409 USPTR Xcharptr;
339 ph10 406 #endif
340 ph10 409 USPTR Xdata;
341     USPTR Xnext;
342     USPTR Xpp;
343     USPTR Xprev;
344     USPTR Xsaved_eptr;
345 nigel 77
346     recursion_info Xnew_recursive;
347    
348     BOOL Xcur_is_word;
349     BOOL Xcondition;
350     BOOL Xprev_is_word;
351    
352     unsigned long int Xoriginal_ims;
353    
354     #ifdef SUPPORT_UCP
355     int Xprop_type;
356 nigel 87 int Xprop_value;
357 nigel 77 int Xprop_fail_result;
358     int Xprop_category;
359     int Xprop_chartype;
360 nigel 87 int Xprop_script;
361 ph10 123 int Xoclength;
362     uschar Xocchars[8];
363 nigel 77 #endif
364    
365 ph10 403 int Xcodelink;
366 nigel 77 int Xctype;
367 nigel 93 unsigned int Xfc;
368 nigel 77 int Xfi;
369     int Xlength;
370     int Xmax;
371     int Xmin;
372     int Xnumber;
373     int Xoffset;
374     int Xop;
375     int Xsave_capture_last;
376     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377     int Xstacksave[REC_STACK_SAVE_MAX];
378    
379     eptrblock Xnewptrb;
380    
381 ph10 164 /* Where to jump back to */
382 nigel 77
383 ph10 164 int Xwhere;
384 ph10 165
385 nigel 77 } heapframe;
386    
387     #endif
388    
389    
390     /***************************************************************************
391     ***************************************************************************/
392    
393    
394    
395     /*************************************************
396     * Match from current position *
397     *************************************************/
398    
399 nigel 93 /* This function is called recursively in many circumstances. Whenever it
400 nigel 77 returns a negative (error) response, the outer incarnation must also return the
401 ph10 426 same response. */
402 nigel 77
403 ph10 426 /* These macros pack up tests that are used for partial matching, and which
404     appears several times in the code. We set the "hit end" flag if the pointer is
405     at the end of the subject and also past the start of the subject (i.e.
406 ph10 427 something has been matched). For hard partial matching, we then return
407     immediately. The second one is used when we already know we are past the end of
408     the subject. */
409 ph10 426
410     #define CHECK_PARTIAL()\
411 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412 ph10 427 {\
413     md->hitend = TRUE;\
414     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415     }
416 ph10 426
417     #define SCHECK_PARTIAL()\
418 ph10 462 if (md->partial != 0 && eptr > mstart)\
419 ph10 427 {\
420     md->hitend = TRUE;\
421     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422     }
423 ph10 426
424 ph10 427
425 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
426     the md structure (e.g. utf8, end_subject) into individual variables to improve
427 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
428     made performance worse.
429    
430     Arguments:
431 nigel 93 eptr pointer to current character in subject
432     ecode pointer to current position in compiled code
433 ph10 168 mstart pointer to the current match start position (can be modified
434 ph10 172 by encountering \K)
435 nigel 77 offset_top current top pointer
436     md pointer to "static" info for the match
437     ims current /i, /m, and /s options
438     eptrb pointer to chain of blocks containing eptr at start of
439     brackets - for testing for empty matches
440     flags can contain
441     match_condassert - this is an assertion condition
442 nigel 93 match_cbegroup - this is the start of an unlimited repeat
443     group that can match an empty string
444 nigel 87 rdepth the recursion depth
445 nigel 77
446     Returns: MATCH_MATCH if matched ) these values are >= 0
447     MATCH_NOMATCH if failed to match )
448     a negative PCRE_ERROR_xxx value if aborted by an error condition
449 nigel 87 (e.g. stopped by repeated call or recursion limit)
450 nigel 77 */
451    
452     static int
453 ph10 409 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455 nigel 91 int flags, unsigned int rdepth)
456 nigel 77 {
457     /* These variables do not need to be preserved over recursion in this function,
458 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
459     "register" because they are used a lot in loops. */
460 nigel 77
461 nigel 91 register int rrc; /* Returns from recursive calls */
462     register int i; /* Used for loops not involving calls to RMATCH() */
463 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
464 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
465 nigel 77
466 nigel 93 BOOL minimize, possessive; /* Quantifier options */
467 ph10 403 int condcode;
468 nigel 93
469 nigel 77 /* When recursion is not being used, all "local" variables that have to be
470     preserved over calls to RMATCH() are part of a "frame" which is obtained from
471     heap storage. Set up the top-level frame here; others are obtained from the
472     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
473    
474     #ifdef NO_RECURSE
475     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
476     frame->Xprevframe = NULL; /* Marks the top level */
477    
478     /* Copy in the original argument variables */
479    
480     frame->Xeptr = eptr;
481     frame->Xecode = ecode;
482 ph10 168 frame->Xmstart = mstart;
483 nigel 77 frame->Xoffset_top = offset_top;
484     frame->Xims = ims;
485     frame->Xeptrb = eptrb;
486     frame->Xflags = flags;
487 nigel 87 frame->Xrdepth = rdepth;
488 nigel 77
489     /* This is where control jumps back to to effect "recursion" */
490    
491     HEAP_RECURSE:
492    
493     /* Macros make the argument variables come from the current frame */
494    
495     #define eptr frame->Xeptr
496     #define ecode frame->Xecode
497 ph10 168 #define mstart frame->Xmstart
498 nigel 77 #define offset_top frame->Xoffset_top
499     #define ims frame->Xims
500     #define eptrb frame->Xeptrb
501     #define flags frame->Xflags
502 nigel 87 #define rdepth frame->Xrdepth
503 nigel 77
504     /* Ditto for the local variables */
505    
506     #ifdef SUPPORT_UTF8
507     #define charptr frame->Xcharptr
508     #endif
509     #define callpat frame->Xcallpat
510 ph10 403 #define codelink frame->Xcodelink
511 nigel 77 #define data frame->Xdata
512     #define next frame->Xnext
513     #define pp frame->Xpp
514     #define prev frame->Xprev
515     #define saved_eptr frame->Xsaved_eptr
516    
517     #define new_recursive frame->Xnew_recursive
518    
519     #define cur_is_word frame->Xcur_is_word
520     #define condition frame->Xcondition
521     #define prev_is_word frame->Xprev_is_word
522    
523     #define original_ims frame->Xoriginal_ims
524    
525     #ifdef SUPPORT_UCP
526     #define prop_type frame->Xprop_type
527 nigel 87 #define prop_value frame->Xprop_value
528 nigel 77 #define prop_fail_result frame->Xprop_fail_result
529     #define prop_category frame->Xprop_category
530     #define prop_chartype frame->Xprop_chartype
531 nigel 87 #define prop_script frame->Xprop_script
532 ph10 115 #define oclength frame->Xoclength
533     #define occhars frame->Xocchars
534 nigel 77 #endif
535    
536     #define ctype frame->Xctype
537     #define fc frame->Xfc
538     #define fi frame->Xfi
539     #define length frame->Xlength
540     #define max frame->Xmax
541     #define min frame->Xmin
542     #define number frame->Xnumber
543     #define offset frame->Xoffset
544     #define op frame->Xop
545     #define save_capture_last frame->Xsave_capture_last
546     #define save_offset1 frame->Xsave_offset1
547     #define save_offset2 frame->Xsave_offset2
548     #define save_offset3 frame->Xsave_offset3
549     #define stacksave frame->Xstacksave
550    
551     #define newptrb frame->Xnewptrb
552    
553     /* When recursion is being used, local variables are allocated on the stack and
554     get preserved during recursion in the normal way. In this environment, fi and
555     i, and fc and c, can be the same variables. */
556    
557 nigel 93 #else /* NO_RECURSE not defined */
558 nigel 77 #define fi i
559     #define fc c
560    
561    
562 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
563     const uschar *charptr; /* in small blocks of the code. My normal */
564     #endif /* style of coding would have declared */
565     const uschar *callpat; /* them within each of those blocks. */
566     const uschar *data; /* However, in order to accommodate the */
567     const uschar *next; /* version of this code that uses an */
568     USPTR pp; /* external "stack" implemented on the */
569     const uschar *prev; /* heap, it is easier to declare them all */
570     USPTR saved_eptr; /* here, so the declarations can be cut */
571     /* out in a block. The only declarations */
572     recursion_info new_recursive; /* within blocks below are for variables */
573     /* that do not have to be preserved over */
574     BOOL cur_is_word; /* a recursive call to RMATCH(). */
575     BOOL condition;
576 nigel 77 BOOL prev_is_word;
577    
578     unsigned long int original_ims;
579    
580     #ifdef SUPPORT_UCP
581     int prop_type;
582 nigel 87 int prop_value;
583 nigel 77 int prop_fail_result;
584     int prop_category;
585     int prop_chartype;
586 nigel 87 int prop_script;
587 ph10 115 int oclength;
588     uschar occhars[8];
589 nigel 77 #endif
590    
591 ph10 399 int codelink;
592 nigel 77 int ctype;
593     int length;
594     int max;
595     int min;
596     int number;
597     int offset;
598     int op;
599     int save_capture_last;
600     int save_offset1, save_offset2, save_offset3;
601     int stacksave[REC_STACK_SAVE_MAX];
602    
603     eptrblock newptrb;
604 nigel 93 #endif /* NO_RECURSE */
605 nigel 77
606     /* These statements are here to stop the compiler complaining about unitialized
607     variables. */
608    
609     #ifdef SUPPORT_UCP
610 nigel 87 prop_value = 0;
611 nigel 77 prop_fail_result = 0;
612     #endif
613    
614 nigel 93
615 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
616     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
617     used. Thanks to Ian Taylor for noticing this possibility and sending the
618     original patch. */
619    
620     TAIL_RECURSE:
621    
622 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
623     are specified by the macro RMATCH and RRETURN is used to return. When
624     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
625 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
626 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
627     complicated macro. It has to be used in one particular way. This shouldn't,
628     however, impact performance when true recursion is being used. */
629 nigel 77
630 ph10 164 #ifdef SUPPORT_UTF8
631     utf8 = md->utf8; /* Local copy of the flag */
632     #else
633     utf8 = FALSE;
634     #endif
635    
636 nigel 87 /* First check that we haven't called match() too many times, or that we
637     haven't exceeded the recursive call limit. */
638    
639 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
640 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
641 nigel 77
642     original_ims = ims; /* Save for resetting on ')' */
643 nigel 91
644 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
645     string, the match_cbegroup flag is set. When this is the case, add the current
646     subject pointer to the chain of such remembered pointers, to be checked when we
647     hit the closing ket, in order to break infinite loops that match no characters.
648 ph10 197 When match() is called in other circumstances, don't add to the chain. The
649     match_cbegroup flag must NOT be used with tail recursion, because the memory
650     block that is used is on the stack, so a new one may be required for each
651     match(). */
652 nigel 77
653 nigel 93 if ((flags & match_cbegroup) != 0)
654 nigel 77 {
655 ph10 197 newptrb.epb_saved_eptr = eptr;
656     newptrb.epb_prev = eptrb;
657     eptrb = &newptrb;
658 nigel 77 }
659    
660 nigel 93 /* Now start processing the opcodes. */
661 nigel 77
662     for (;;)
663     {
664 nigel 93 minimize = possessive = FALSE;
665 nigel 77 op = *ecode;
666 ph10 443
667 nigel 93 switch(op)
668     {
669 ph10 210 case OP_FAIL:
670 ph10 212 RRETURN(MATCH_NOMATCH);
671 ph10 211
672 ph10 210 case OP_PRUNE:
673     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
674     ims, eptrb, flags, RM51);
675     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676 ph10 212 RRETURN(MATCH_PRUNE);
677 ph10 211
678 ph10 210 case OP_COMMIT:
679     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
680     ims, eptrb, flags, RM52);
681     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682 ph10 212 RRETURN(MATCH_COMMIT);
683 ph10 211
684 ph10 210 case OP_SKIP:
685     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
686     ims, eptrb, flags, RM53);
687     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
688 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
689 ph10 212 RRETURN(MATCH_SKIP);
690 ph10 211
691 ph10 210 case OP_THEN:
692     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693 ph10 212 ims, eptrb, flags, RM54);
694 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695 ph10 212 RRETURN(MATCH_THEN);
696 ph10 211
697 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
698     the current subject position in the working slot at the top of the vector.
699     We mustn't change the current values of the data slot, because they may be
700     set from a previous iteration of this group, and be referred to by a
701     reference inside the group.
702 nigel 77
703 nigel 93 If the bracket fails to match, we need to restore this value and also the
704     values of the final offsets, in case they were set by a previous iteration
705     of the same bracket.
706 nigel 77
707 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
708     a non-capturing bracket. Don't worry about setting the flag for the error
709     case here; that is handled in the code for KET. */
710 nigel 77
711 nigel 93 case OP_CBRA:
712     case OP_SCBRA:
713     number = GET2(ecode, 1+LINK_SIZE);
714 nigel 77 offset = number << 1;
715    
716 ph10 475 #ifdef PCRE_DEBUG
717 nigel 93 printf("start bracket %d\n", number);
718     printf("subject=");
719 nigel 77 pchars(eptr, 16, TRUE, md);
720     printf("\n");
721     #endif
722    
723     if (offset < md->offset_max)
724     {
725     save_offset1 = md->offset_vector[offset];
726     save_offset2 = md->offset_vector[offset+1];
727     save_offset3 = md->offset_vector[md->offset_end - number];
728     save_capture_last = md->capture_last;
729    
730     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
731     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
732    
733 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
734 nigel 77 do
735     {
736 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
737     ims, eptrb, flags, RM1);
738 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
739 nigel 77 md->capture_last = save_capture_last;
740     ecode += GET(ecode, 1);
741     }
742     while (*ecode == OP_ALT);
743    
744     DPRINTF(("bracket %d failed\n", number));
745    
746     md->offset_vector[offset] = save_offset1;
747     md->offset_vector[offset+1] = save_offset2;
748     md->offset_vector[md->offset_end - number] = save_offset3;
749    
750     RRETURN(MATCH_NOMATCH);
751     }
752    
753 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
754     as a non-capturing bracket. */
755 nigel 77
756 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
757     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
758    
759 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
760 nigel 77
761 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
765     final alternative within the brackets, we would return the result of a
766     recursive call to match() whatever happened. We can reduce stack usage by
767 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
768     is set.*/
769 nigel 77
770 nigel 93 case OP_BRA:
771     case OP_SBRA:
772     DPRINTF(("start non-capturing bracket\n"));
773     flags = (op >= OP_SBRA)? match_cbegroup : 0;
774 nigel 91 for (;;)
775 nigel 77 {
776 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
777 nigel 93 {
778 ph10 197 if (flags == 0) /* Not a possibly empty group */
779     {
780     ecode += _pcre_OP_lengths[*ecode];
781     DPRINTF(("bracket 0 tail recursion\n"));
782     goto TAIL_RECURSE;
783     }
784    
785     /* Possibly empty group; can't use tail recursion. */
786    
787     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
788     eptrb, flags, RM48);
789     RRETURN(rrc);
790 nigel 93 }
791 nigel 91
792     /* For non-final alternatives, continue the loop for a NOMATCH result;
793     otherwise return. */
794    
795 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
796     eptrb, flags, RM2);
797 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
798 nigel 77 ecode += GET(ecode, 1);
799     }
800 nigel 91 /* Control never reaches here. */
801 nigel 77
802     /* Conditional group: compilation checked that there are no more than
803     two branches. If the condition is false, skipping the first branch takes us
804     past the end if there is only one branch, but that's OK because that is
805 nigel 91 exactly what going to the ket would do. As there is only one branch to be
806     obeyed, we can use tail recursion to avoid using another stack frame. */
807 nigel 77
808     case OP_COND:
809 nigel 93 case OP_SCOND:
810 ph10 399 codelink= GET(ecode, 1);
811 ph10 406
812 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
813     inserted between OP_COND and an assertion condition. */
814 ph10 392
815 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816     {
817     if (pcre_callout != NULL)
818     {
819     pcre_callout_block cb;
820     cb.version = 1; /* Version 1 of the callout block */
821     cb.callout_number = ecode[LINK_SIZE+2];
822     cb.offset_vector = md->offset_vector;
823     cb.subject = (PCRE_SPTR)md->start_subject;
824     cb.subject_length = md->end_subject - md->start_subject;
825     cb.start_match = mstart - md->start_subject;
826     cb.current_position = eptr - md->start_subject;
827     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829     cb.capture_top = offset_top/2;
830     cb.capture_last = md->capture_last;
831     cb.callout_data = md->callout_data;
832     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833     if (rrc < 0) RRETURN(rrc);
834     }
835     ecode += _pcre_OP_lengths[OP_CALLOUT];
836     }
837 ph10 392
838 ph10 399 condcode = ecode[LINK_SIZE+1];
839 ph10 406
840 ph10 381 /* Now see what the actual condition is */
841 ph10 392
842 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
843 nigel 77 {
844 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
845     {
846 ph10 461 condition = FALSE;
847     ecode += GET(ecode, 1);
848     }
849 ph10 459 else
850 ph10 461 {
851 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
852     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
853 ph10 461
854 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
855     false, but the test was set up by name, scan the table to see if the
856     name refers to any other numbers, and test them. The condition is true
857     if any one is set. */
858 ph10 461
859 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
860     {
861     uschar *slotA = md->name_table;
862     for (i = 0; i < md->name_count; i++)
863 ph10 461 {
864     if (GET2(slotA, 0) == recno) break;
865 ph10 459 slotA += md->name_entry_size;
866     }
867 ph10 461
868 ph10 459 /* Found a name for the number - there can be only one; duplicate
869     names for different numbers are allowed, but not vice versa. First
870     scan down for duplicates. */
871 ph10 461
872 ph10 459 if (i < md->name_count)
873 ph10 461 {
874 ph10 459 uschar *slotB = slotA;
875     while (slotB > md->name_table)
876     {
877     slotB -= md->name_entry_size;
878     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
879     {
880     condition = GET2(slotB, 0) == md->recursive->group_num;
881 ph10 461 if (condition) break;
882     }
883 ph10 459 else break;
884 ph10 461 }
885    
886 ph10 459 /* Scan up for duplicates */
887 ph10 461
888 ph10 459 if (!condition)
889 ph10 461 {
890 ph10 459 slotB = slotA;
891     for (i++; i < md->name_count; i++)
892     {
893     slotB += md->name_entry_size;
894     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
895     {
896     condition = GET2(slotB, 0) == md->recursive->group_num;
897     if (condition) break;
898 ph10 461 }
899 ph10 459 else break;
900 ph10 461 }
901     }
902 ph10 459 }
903 ph10 461 }
904    
905 ph10 459 /* Chose branch according to the condition */
906 ph10 461
907 ph10 459 ecode += condition? 3 : GET(ecode, 1);
908     }
909 ph10 461 }
910 nigel 93
911 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
912 nigel 93 {
913 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
914 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
915 ph10 461
916 ph10 459 /* If the numbered capture is unset, but the reference was by name,
917 ph10 461 scan the table to see if the name refers to any other numbers, and test
918     them. The condition is true if any one is set. This is tediously similar
919     to the code above, but not close enough to try to amalgamate. */
920    
921 ph10 459 if (!condition && condcode == OP_NCREF)
922     {
923 ph10 461 int refno = offset >> 1;
924 ph10 459 uschar *slotA = md->name_table;
925 ph10 461
926 ph10 459 for (i = 0; i < md->name_count; i++)
927 ph10 461 {
928     if (GET2(slotA, 0) == refno) break;
929 ph10 459 slotA += md->name_entry_size;
930     }
931 ph10 461
932     /* Found a name for the number - there can be only one; duplicate names
933     for different numbers are allowed, but not vice versa. First scan down
934 ph10 459 for duplicates. */
935 ph10 461
936 ph10 459 if (i < md->name_count)
937 ph10 461 {
938 ph10 459 uschar *slotB = slotA;
939     while (slotB > md->name_table)
940     {
941     slotB -= md->name_entry_size;
942     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
943     {
944     offset = GET2(slotB, 0) << 1;
945 ph10 461 condition = offset < offset_top &&
946 ph10 459 md->offset_vector[offset] >= 0;
947 ph10 461 if (condition) break;
948     }
949 ph10 459 else break;
950 ph10 461 }
951    
952 ph10 459 /* Scan up for duplicates */
953 ph10 461
954 ph10 459 if (!condition)
955 ph10 461 {
956 ph10 459 slotB = slotA;
957     for (i++; i < md->name_count; i++)
958     {
959     slotB += md->name_entry_size;
960     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961     {
962     offset = GET2(slotB, 0) << 1;
963 ph10 461 condition = offset < offset_top &&
964 ph10 459 md->offset_vector[offset] >= 0;
965 ph10 461 if (condition) break;
966     }
967 ph10 459 else break;
968 ph10 461 }
969     }
970 ph10 459 }
971 ph10 461 }
972    
973 ph10 459 /* Chose branch according to the condition */
974    
975 nigel 93 ecode += condition? 3 : GET(ecode, 1);
976 nigel 77 }
977    
978 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
979 nigel 93 {
980     condition = FALSE;
981     ecode += GET(ecode, 1);
982     }
983    
984 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
985 nigel 93 the final argument match_condassert causes it to stop at the end of an
986     assertion. */
987 nigel 77
988     else
989     {
990 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
991     match_condassert, RM3);
992 nigel 77 if (rrc == MATCH_MATCH)
993     {
994 nigel 93 condition = TRUE;
995     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
996 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
997     }
998 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
999 nigel 77 {
1000     RRETURN(rrc); /* Need braces because of following else */
1001     }
1002 nigel 93 else
1003     {
1004     condition = FALSE;
1005 ph10 399 ecode += codelink;
1006 nigel 93 }
1007     }
1008 nigel 91
1009 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1010 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1011     match_cbegroup is required for an unlimited repeat of a possibly empty
1012     group. If the second alternative doesn't exist, we can just plough on. */
1013 nigel 91
1014 nigel 93 if (condition || *ecode == OP_ALT)
1015     {
1016 nigel 91 ecode += 1 + LINK_SIZE;
1017 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1018     {
1019     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1020     RRETURN(rrc);
1021     }
1022     else /* Group must match something */
1023     {
1024     flags = 0;
1025     goto TAIL_RECURSE;
1026     }
1027 nigel 77 }
1028 ph10 395 else /* Condition false & no alternative */
1029 nigel 93 {
1030     ecode += 1 + LINK_SIZE;
1031     }
1032     break;
1033 nigel 77
1034 ph10 461
1035 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1036     to close any currently open capturing brackets. */
1037 ph10 461
1038 ph10 447 case OP_CLOSE:
1039 ph10 461 number = GET2(ecode, 1);
1040 ph10 447 offset = number << 1;
1041 ph10 461
1042 ph10 475 #ifdef PCRE_DEBUG
1043 ph10 447 printf("end bracket %d at *ACCEPT", number);
1044     printf("\n");
1045     #endif
1046 nigel 77
1047 ph10 447 md->capture_last = number;
1048     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1049     {
1050     md->offset_vector[offset] =
1051     md->offset_vector[md->offset_end - number];
1052     md->offset_vector[offset+1] = eptr - md->start_subject;
1053     if (offset_top <= offset) offset_top = offset + 2;
1054     }
1055     ecode += 3;
1056 ph10 461 break;
1057 ph10 447
1058    
1059 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1060     recursion, we should restore the offsets appropriately and continue from
1061     after the call. */
1062 nigel 77
1063 ph10 210 case OP_ACCEPT:
1064 nigel 77 case OP_END:
1065     if (md->recursive != NULL && md->recursive->group_num == 0)
1066     {
1067     recursion_info *rec = md->recursive;
1068 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1069 nigel 77 md->recursive = rec->prevrec;
1070     memmove(md->offset_vector, rec->offset_save,
1071     rec->saved_max * sizeof(int));
1072 ph10 461 offset_top = rec->save_offset_top;
1073 nigel 77 ims = original_ims;
1074     ecode = rec->after_call;
1075     break;
1076     }
1077    
1078 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1079     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1080     the subject. In both cases, backtracking will then try other alternatives,
1081     if any. */
1082 ph10 443
1083 ph10 442 if (eptr == mstart &&
1084     (md->notempty ||
1085 ph10 443 (md->notempty_atstart &&
1086 ph10 442 mstart == md->start_subject + md->start_offset)))
1087 ph10 443 RRETURN(MATCH_NOMATCH);
1088    
1089 ph10 442 /* Otherwise, we have a match. */
1090 nigel 77
1091 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1092     md->end_offset_top = offset_top; /* and how many extracts were taken */
1093 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1094 nigel 77 RRETURN(MATCH_MATCH);
1095    
1096     /* Change option settings */
1097    
1098     case OP_OPT:
1099     ims = ecode[1];
1100     ecode += 2;
1101     DPRINTF(("ims set to %02lx\n", ims));
1102     break;
1103    
1104     /* Assertion brackets. Check the alternative branches in turn - the
1105     matching won't pass the KET for an assertion. If any one branch matches,
1106     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1107     start of each branch to move the current point backwards, so the code at
1108     this level is identical to the lookahead case. */
1109    
1110     case OP_ASSERT:
1111     case OP_ASSERTBACK:
1112     do
1113     {
1114 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1115     RM4);
1116 ph10 500 if (rrc == MATCH_MATCH)
1117     {
1118     mstart = md->start_match_ptr; /* In case \K reset it */
1119     break;
1120     }
1121 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1122 nigel 77 ecode += GET(ecode, 1);
1123     }
1124     while (*ecode == OP_ALT);
1125     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1126    
1127     /* If checking an assertion for a condition, return MATCH_MATCH. */
1128    
1129     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1130    
1131     /* Continue from after the assertion, updating the offsets high water
1132     mark, since extracts may have been taken during the assertion. */
1133    
1134     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1135     ecode += 1 + LINK_SIZE;
1136     offset_top = md->end_offset_top;
1137     continue;
1138    
1139 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1140 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1141 ph10 473 branches. */
1142 nigel 77
1143     case OP_ASSERT_NOT:
1144     case OP_ASSERTBACK_NOT:
1145     do
1146     {
1147 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1148     RM5);
1149 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1150 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1151     {
1152     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1153 ph10 482 break;
1154     }
1155 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1156 nigel 77 ecode += GET(ecode,1);
1157     }
1158     while (*ecode == OP_ALT);
1159    
1160     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1161    
1162     ecode += 1 + LINK_SIZE;
1163     continue;
1164    
1165     /* Move the subject pointer back. This occurs only at the start of
1166     each branch of a lookbehind assertion. If we are too close to the start to
1167     move back, this match function fails. When working with UTF-8 we move
1168     back a number of characters, not bytes. */
1169    
1170     case OP_REVERSE:
1171     #ifdef SUPPORT_UTF8
1172     if (utf8)
1173     {
1174 nigel 93 i = GET(ecode, 1);
1175     while (i-- > 0)
1176 nigel 77 {
1177     eptr--;
1178     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1179 ph10 207 BACKCHAR(eptr);
1180 nigel 77 }
1181     }
1182     else
1183     #endif
1184    
1185     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1186    
1187     {
1188 nigel 93 eptr -= GET(ecode, 1);
1189 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1190     }
1191    
1192 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1193 nigel 77
1194 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1195 nigel 77 ecode += 1 + LINK_SIZE;
1196     break;
1197    
1198     /* The callout item calls an external function, if one is provided, passing
1199     details of the match so far. This is mainly for debugging, though the
1200     function is able to force a failure. */
1201    
1202     case OP_CALLOUT:
1203     if (pcre_callout != NULL)
1204     {
1205     pcre_callout_block cb;
1206     cb.version = 1; /* Version 1 of the callout block */
1207     cb.callout_number = ecode[1];
1208     cb.offset_vector = md->offset_vector;
1209 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1210 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1211 ph10 168 cb.start_match = mstart - md->start_subject;
1212 nigel 77 cb.current_position = eptr - md->start_subject;
1213     cb.pattern_position = GET(ecode, 2);
1214     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1215     cb.capture_top = offset_top/2;
1216     cb.capture_last = md->capture_last;
1217     cb.callout_data = md->callout_data;
1218     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1219     if (rrc < 0) RRETURN(rrc);
1220     }
1221     ecode += 2 + 2*LINK_SIZE;
1222     break;
1223    
1224     /* Recursion either matches the current regex, or some subexpression. The
1225     offset data is the offset to the starting bracket from the start of the
1226     whole pattern. (This is so that it works from duplicated subpatterns.)
1227    
1228     If there are any capturing brackets started but not finished, we have to
1229     save their starting points and reinstate them after the recursion. However,
1230     we don't know how many such there are (offset_top records the completed
1231     total) so we just have to save all the potential data. There may be up to
1232     65535 such values, which is too large to put on the stack, but using malloc
1233     for small numbers seems expensive. As a compromise, the stack is used when
1234     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1235     is used. A problem is what to do if the malloc fails ... there is no way of
1236     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1237     values on the stack, and accept that the rest may be wrong.
1238    
1239     There are also other values that have to be saved. We use a chained
1240     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1241     for the original version of this logic. */
1242    
1243     case OP_RECURSE:
1244     {
1245     callpat = md->start_code + GET(ecode, 1);
1246 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1247     GET2(callpat, 1 + LINK_SIZE);
1248 nigel 77
1249     /* Add to "recursing stack" */
1250    
1251     new_recursive.prevrec = md->recursive;
1252     md->recursive = &new_recursive;
1253    
1254     /* Find where to continue from afterwards */
1255    
1256     ecode += 1 + LINK_SIZE;
1257     new_recursive.after_call = ecode;
1258    
1259     /* Now save the offset data. */
1260    
1261     new_recursive.saved_max = md->offset_end;
1262     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1263     new_recursive.offset_save = stacksave;
1264     else
1265     {
1266     new_recursive.offset_save =
1267     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1268     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1269     }
1270    
1271     memcpy(new_recursive.offset_save, md->offset_vector,
1272     new_recursive.saved_max * sizeof(int));
1273 ph10 461 new_recursive.save_offset_top = offset_top;
1274 nigel 77
1275     /* OK, now we can do the recursion. For each top-level alternative we
1276     restore the offset and recursion data. */
1277    
1278     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1279 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1280 nigel 77 do
1281     {
1282 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1283     md, ims, eptrb, flags, RM6);
1284 nigel 77 if (rrc == MATCH_MATCH)
1285     {
1286 nigel 87 DPRINTF(("Recursion matched\n"));
1287 nigel 77 md->recursive = new_recursive.prevrec;
1288     if (new_recursive.offset_save != stacksave)
1289     (pcre_free)(new_recursive.offset_save);
1290     RRETURN(MATCH_MATCH);
1291     }
1292 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1293 nigel 87 {
1294     DPRINTF(("Recursion gave error %d\n", rrc));
1295 ph10 400 if (new_recursive.offset_save != stacksave)
1296     (pcre_free)(new_recursive.offset_save);
1297 nigel 87 RRETURN(rrc);
1298     }
1299 nigel 77
1300     md->recursive = &new_recursive;
1301     memcpy(md->offset_vector, new_recursive.offset_save,
1302     new_recursive.saved_max * sizeof(int));
1303     callpat += GET(callpat, 1);
1304     }
1305     while (*callpat == OP_ALT);
1306    
1307     DPRINTF(("Recursion didn't match\n"));
1308     md->recursive = new_recursive.prevrec;
1309     if (new_recursive.offset_save != stacksave)
1310     (pcre_free)(new_recursive.offset_save);
1311     RRETURN(MATCH_NOMATCH);
1312     }
1313     /* Control never reaches here */
1314    
1315     /* "Once" brackets are like assertion brackets except that after a match,
1316     the point in the subject string is not moved back. Thus there can never be
1317     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1318     Check the alternative branches in turn - the matching won't pass the KET
1319     for this kind of subpattern. If any one branch matches, we carry on as at
1320 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1321     the start-of-match value in case it was changed by \K. */
1322 nigel 77
1323     case OP_ONCE:
1324 nigel 91 prev = ecode;
1325     saved_eptr = eptr;
1326    
1327     do
1328 nigel 77 {
1329 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1330 ph10 500 if (rrc == MATCH_MATCH)
1331     {
1332     mstart = md->start_match_ptr;
1333     break;
1334     }
1335 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1336 nigel 91 ecode += GET(ecode,1);
1337     }
1338     while (*ecode == OP_ALT);
1339 nigel 77
1340 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1341 nigel 77
1342 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1343 nigel 77
1344 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1345     mark, since extracts may have been taken. */
1346 nigel 77
1347 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1348 nigel 77
1349 nigel 91 offset_top = md->end_offset_top;
1350     eptr = md->end_match_ptr;
1351 nigel 77
1352 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1353     happens for a repeating ket if no characters were matched in the group.
1354     This is the forcible breaking of infinite loops as implemented in Perl
1355     5.005. If there is an options reset, it will get obeyed in the normal
1356     course of events. */
1357 nigel 77
1358 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1359     {
1360     ecode += 1+LINK_SIZE;
1361     break;
1362     }
1363 nigel 77
1364 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1365     preceding bracket, in the appropriate order. The second "call" of match()
1366     uses tail recursion, to avoid using another stack frame. We need to reset
1367     any options that changed within the bracket before re-running it, so
1368     check the next opcode. */
1369 nigel 77
1370 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1371     {
1372     ims = (ims & ~PCRE_IMS) | ecode[4];
1373     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1374     }
1375 nigel 77
1376 nigel 91 if (*ecode == OP_KETRMIN)
1377     {
1378 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1379 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1380     ecode = prev;
1381 ph10 197 flags = 0;
1382 nigel 91 goto TAIL_RECURSE;
1383 nigel 77 }
1384 nigel 91 else /* OP_KETRMAX */
1385     {
1386 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1387 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1388     ecode += 1 + LINK_SIZE;
1389 ph10 197 flags = 0;
1390 nigel 91 goto TAIL_RECURSE;
1391     }
1392     /* Control never gets here */
1393 nigel 77
1394     /* An alternation is the end of a branch; scan along to find the end of the
1395     bracketed group and go to there. */
1396    
1397     case OP_ALT:
1398     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1399     break;
1400    
1401 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1402     indicating that it may occur zero times. It may repeat infinitely, or not
1403     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1404     with fixed upper repeat limits are compiled as a number of copies, with the
1405     optional ones preceded by BRAZERO or BRAMINZERO. */
1406 nigel 77
1407     case OP_BRAZERO:
1408     {
1409     next = ecode+1;
1410 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1411 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1412     do next += GET(next,1); while (*next == OP_ALT);
1413 nigel 93 ecode = next + 1 + LINK_SIZE;
1414 nigel 77 }
1415     break;
1416    
1417     case OP_BRAMINZERO:
1418     {
1419     next = ecode+1;
1420 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1421 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1422 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1423     ecode++;
1424     }
1425     break;
1426    
1427 ph10 335 case OP_SKIPZERO:
1428     {
1429     next = ecode+1;
1430     do next += GET(next,1); while (*next == OP_ALT);
1431     ecode = next + 1 + LINK_SIZE;
1432     }
1433     break;
1434    
1435 nigel 93 /* End of a group, repeated or non-repeating. */
1436 nigel 77
1437     case OP_KET:
1438     case OP_KETRMIN:
1439     case OP_KETRMAX:
1440 nigel 91 prev = ecode - GET(ecode, 1);
1441 nigel 77
1442 nigel 93 /* If this was a group that remembered the subject start, in order to break
1443     infinite repeats of empty string matches, retrieve the subject start from
1444     the chain. Otherwise, set it NULL. */
1445 nigel 77
1446 nigel 93 if (*prev >= OP_SBRA)
1447     {
1448     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1449     eptrb = eptrb->epb_prev; /* Backup to previous group */
1450     }
1451     else saved_eptr = NULL;
1452 nigel 77
1453 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1454     matching and return MATCH_MATCH, but record the current high water mark for
1455     use by positive assertions. We also need to record the match start in case
1456     it was changed by \K. */
1457 nigel 93
1458 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1459     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1460     *prev == OP_ONCE)
1461     {
1462     md->end_match_ptr = eptr; /* For ONCE */
1463     md->end_offset_top = offset_top;
1464 ph10 500 md->start_match_ptr = mstart;
1465 nigel 91 RRETURN(MATCH_MATCH);
1466     }
1467 nigel 77
1468 nigel 93 /* For capturing groups we have to check the group number back at the start
1469     and if necessary complete handling an extraction by setting the offsets and
1470     bumping the high water mark. Note that whole-pattern recursion is coded as
1471     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1472     when the OP_END is reached. Other recursion is handled here. */
1473 nigel 77
1474 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1475 nigel 91 {
1476 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1477 nigel 91 offset = number << 1;
1478 ph10 461
1479 ph10 475 #ifdef PCRE_DEBUG
1480 nigel 91 printf("end bracket %d", number);
1481     printf("\n");
1482 nigel 77 #endif
1483    
1484 nigel 93 md->capture_last = number;
1485     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1486 nigel 91 {
1487 nigel 93 md->offset_vector[offset] =
1488     md->offset_vector[md->offset_end - number];
1489     md->offset_vector[offset+1] = eptr - md->start_subject;
1490     if (offset_top <= offset) offset_top = offset + 2;
1491     }
1492 nigel 77
1493 nigel 93 /* Handle a recursively called group. Restore the offsets
1494     appropriately and continue from after the call. */
1495 nigel 77
1496 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1497     {
1498     recursion_info *rec = md->recursive;
1499     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1500     md->recursive = rec->prevrec;
1501     memcpy(md->offset_vector, rec->offset_save,
1502     rec->saved_max * sizeof(int));
1503 ph10 461 offset_top = rec->save_offset_top;
1504 nigel 93 ecode = rec->after_call;
1505     ims = original_ims;
1506     break;
1507 nigel 77 }
1508 nigel 91 }
1509 nigel 77
1510 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1511     flags, in case they got changed during the group. */
1512 nigel 77
1513 nigel 91 ims = original_ims;
1514     DPRINTF(("ims reset to %02lx\n", ims));
1515 nigel 77
1516 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1517     happens for a repeating ket if no characters were matched in the group.
1518     This is the forcible breaking of infinite loops as implemented in Perl
1519     5.005. If there is an options reset, it will get obeyed in the normal
1520     course of events. */
1521 nigel 77
1522 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1523     {
1524     ecode += 1 + LINK_SIZE;
1525     break;
1526     }
1527 nigel 77
1528 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1529     preceding bracket, in the appropriate order. In the second case, we can use
1530 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1531     unlimited repeat of a group that can match an empty string. */
1532 nigel 77
1533 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1534    
1535 nigel 91 if (*ecode == OP_KETRMIN)
1536     {
1537 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1538 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1539 ph10 197 if (flags != 0) /* Could match an empty string */
1540     {
1541     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1542     RRETURN(rrc);
1543     }
1544 nigel 91 ecode = prev;
1545     goto TAIL_RECURSE;
1546 nigel 77 }
1547 nigel 91 else /* OP_KETRMAX */
1548     {
1549 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1550 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1551     ecode += 1 + LINK_SIZE;
1552 ph10 197 flags = 0;
1553 nigel 91 goto TAIL_RECURSE;
1554     }
1555     /* Control never gets here */
1556 nigel 77
1557     /* Start of subject unless notbol, or after internal newline if multiline */
1558    
1559     case OP_CIRC:
1560     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1561     if ((ims & PCRE_MULTILINE) != 0)
1562     {
1563 nigel 91 if (eptr != md->start_subject &&
1564 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1565 nigel 77 RRETURN(MATCH_NOMATCH);
1566     ecode++;
1567     break;
1568     }
1569     /* ... else fall through */
1570    
1571     /* Start of subject assertion */
1572    
1573     case OP_SOD:
1574     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1575     ecode++;
1576     break;
1577    
1578     /* Start of match assertion */
1579    
1580     case OP_SOM:
1581     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1582     ecode++;
1583     break;
1584 ph10 172
1585 ph10 168 /* Reset the start of match point */
1586 ph10 172
1587 ph10 168 case OP_SET_SOM:
1588     mstart = eptr;
1589 ph10 172 ecode++;
1590     break;
1591 nigel 77
1592     /* Assert before internal newline if multiline, or before a terminating
1593     newline unless endonly is set, else end of subject unless noteol is set. */
1594    
1595     case OP_DOLL:
1596     if ((ims & PCRE_MULTILINE) != 0)
1597     {
1598     if (eptr < md->end_subject)
1599 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1600 nigel 77 else
1601     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1602     ecode++;
1603     break;
1604     }
1605     else
1606     {
1607     if (md->noteol) RRETURN(MATCH_NOMATCH);
1608     if (!md->endonly)
1609     {
1610 nigel 91 if (eptr != md->end_subject &&
1611 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1612 nigel 77 RRETURN(MATCH_NOMATCH);
1613     ecode++;
1614     break;
1615     }
1616     }
1617 nigel 91 /* ... else fall through for endonly */
1618 nigel 77
1619     /* End of subject assertion (\z) */
1620    
1621     case OP_EOD:
1622     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1623     ecode++;
1624     break;
1625    
1626     /* End of subject or ending \n assertion (\Z) */
1627    
1628     case OP_EODN:
1629 nigel 91 if (eptr != md->end_subject &&
1630 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1631 nigel 91 RRETURN(MATCH_NOMATCH);
1632 nigel 77 ecode++;
1633     break;
1634    
1635     /* Word boundary assertions */
1636    
1637     case OP_NOT_WORD_BOUNDARY:
1638     case OP_WORD_BOUNDARY:
1639     {
1640    
1641     /* Find out if the previous and current characters are "word" characters.
1642     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1643 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1644 ph10 435 partial matching. */
1645 nigel 77
1646     #ifdef SUPPORT_UTF8
1647     if (utf8)
1648     {
1649     if (eptr == md->start_subject) prev_is_word = FALSE; else
1650     {
1651 ph10 409 USPTR lastptr = eptr - 1;
1652 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1653 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1654 nigel 77 GETCHAR(c, lastptr);
1655     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1656     }
1657 ph10 443 if (eptr >= md->end_subject)
1658 nigel 77 {
1659 ph10 443 SCHECK_PARTIAL();
1660     cur_is_word = FALSE;
1661 ph10 428 }
1662     else
1663     {
1664 nigel 77 GETCHAR(c, eptr);
1665     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1666     }
1667     }
1668     else
1669     #endif
1670    
1671 ph10 428 /* Not in UTF-8 mode */
1672 nigel 77
1673     {
1674 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1675     {
1676 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1677 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1678     }
1679 ph10 443 if (eptr >= md->end_subject)
1680 ph10 428 {
1681 ph10 443 SCHECK_PARTIAL();
1682     cur_is_word = FALSE;
1683 ph10 428 }
1684     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1685 nigel 77 }
1686    
1687     /* Now see if the situation is what we want */
1688    
1689     if ((*ecode++ == OP_WORD_BOUNDARY)?
1690     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1691     RRETURN(MATCH_NOMATCH);
1692     }
1693     break;
1694    
1695     /* Match a single character type; inline for speed */
1696    
1697     case OP_ANY:
1698 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1699 ph10 345 /* Fall through */
1700    
1701 ph10 341 case OP_ALLANY:
1702 ph10 443 if (eptr++ >= md->end_subject)
1703 ph10 428 {
1704 ph10 443 SCHECK_PARTIAL();
1705 ph10 428 RRETURN(MATCH_NOMATCH);
1706 ph10 443 }
1707 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1708 nigel 77 ecode++;
1709     break;
1710    
1711     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1712     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1713    
1714     case OP_ANYBYTE:
1715 ph10 443 if (eptr++ >= md->end_subject)
1716 ph10 428 {
1717 ph10 443 SCHECK_PARTIAL();
1718 ph10 428 RRETURN(MATCH_NOMATCH);
1719 ph10 443 }
1720 nigel 77 ecode++;
1721     break;
1722    
1723     case OP_NOT_DIGIT:
1724 ph10 443 if (eptr >= md->end_subject)
1725 ph10 428 {
1726 ph10 443 SCHECK_PARTIAL();
1727 ph10 428 RRETURN(MATCH_NOMATCH);
1728 ph10 443 }
1729 nigel 77 GETCHARINCTEST(c, eptr);
1730     if (
1731     #ifdef SUPPORT_UTF8
1732     c < 256 &&
1733     #endif
1734     (md->ctypes[c] & ctype_digit) != 0
1735     )
1736     RRETURN(MATCH_NOMATCH);
1737     ecode++;
1738     break;
1739    
1740     case OP_DIGIT:
1741 ph10 443 if (eptr >= md->end_subject)
1742 ph10 428 {
1743 ph10 443 SCHECK_PARTIAL();
1744 ph10 428 RRETURN(MATCH_NOMATCH);
1745 ph10 443 }
1746 nigel 77 GETCHARINCTEST(c, eptr);
1747     if (
1748     #ifdef SUPPORT_UTF8
1749     c >= 256 ||
1750     #endif
1751     (md->ctypes[c] & ctype_digit) == 0
1752     )
1753     RRETURN(MATCH_NOMATCH);
1754     ecode++;
1755     break;
1756    
1757     case OP_NOT_WHITESPACE:
1758 ph10 443 if (eptr >= md->end_subject)
1759 ph10 428 {
1760 ph10 443 SCHECK_PARTIAL();
1761 ph10 428 RRETURN(MATCH_NOMATCH);
1762 ph10 443 }
1763 nigel 77 GETCHARINCTEST(c, eptr);
1764     if (
1765     #ifdef SUPPORT_UTF8
1766     c < 256 &&
1767     #endif
1768     (md->ctypes[c] & ctype_space) != 0
1769     )
1770     RRETURN(MATCH_NOMATCH);
1771     ecode++;
1772     break;
1773    
1774     case OP_WHITESPACE:
1775 ph10 443 if (eptr >= md->end_subject)
1776 ph10 428 {
1777 ph10 443 SCHECK_PARTIAL();
1778 ph10 428 RRETURN(MATCH_NOMATCH);
1779 ph10 443 }
1780 nigel 77 GETCHARINCTEST(c, eptr);
1781     if (
1782     #ifdef SUPPORT_UTF8
1783     c >= 256 ||
1784     #endif
1785     (md->ctypes[c] & ctype_space) == 0
1786     )
1787     RRETURN(MATCH_NOMATCH);
1788     ecode++;
1789     break;
1790    
1791     case OP_NOT_WORDCHAR:
1792 ph10 443 if (eptr >= md->end_subject)
1793 ph10 428 {
1794 ph10 443 SCHECK_PARTIAL();
1795 ph10 428 RRETURN(MATCH_NOMATCH);
1796 ph10 443 }
1797 nigel 77 GETCHARINCTEST(c, eptr);
1798     if (
1799     #ifdef SUPPORT_UTF8
1800     c < 256 &&
1801     #endif
1802     (md->ctypes[c] & ctype_word) != 0
1803     )
1804     RRETURN(MATCH_NOMATCH);
1805     ecode++;
1806     break;
1807    
1808     case OP_WORDCHAR:
1809 ph10 443 if (eptr >= md->end_subject)
1810 ph10 428 {
1811 ph10 443 SCHECK_PARTIAL();
1812 ph10 428 RRETURN(MATCH_NOMATCH);
1813 ph10 443 }
1814 nigel 77 GETCHARINCTEST(c, eptr);
1815     if (
1816     #ifdef SUPPORT_UTF8
1817     c >= 256 ||
1818     #endif
1819     (md->ctypes[c] & ctype_word) == 0
1820     )
1821     RRETURN(MATCH_NOMATCH);
1822     ecode++;
1823     break;
1824    
1825 nigel 93 case OP_ANYNL:
1826 ph10 443 if (eptr >= md->end_subject)
1827 ph10 428 {
1828 ph10 443 SCHECK_PARTIAL();
1829 ph10 428 RRETURN(MATCH_NOMATCH);
1830 ph10 443 }
1831 nigel 93 GETCHARINCTEST(c, eptr);
1832     switch(c)
1833     {
1834     default: RRETURN(MATCH_NOMATCH);
1835     case 0x000d:
1836     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1837     break;
1838 ph10 231
1839 nigel 93 case 0x000a:
1840 ph10 231 break;
1841    
1842 nigel 93 case 0x000b:
1843     case 0x000c:
1844     case 0x0085:
1845     case 0x2028:
1846     case 0x2029:
1847 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1848 nigel 93 break;
1849     }
1850     ecode++;
1851     break;
1852    
1853 ph10 178 case OP_NOT_HSPACE:
1854 ph10 443 if (eptr >= md->end_subject)
1855 ph10 428 {
1856 ph10 443 SCHECK_PARTIAL();
1857 ph10 428 RRETURN(MATCH_NOMATCH);
1858 ph10 443 }
1859 ph10 178 GETCHARINCTEST(c, eptr);
1860     switch(c)
1861     {
1862     default: break;
1863     case 0x09: /* HT */
1864     case 0x20: /* SPACE */
1865     case 0xa0: /* NBSP */
1866     case 0x1680: /* OGHAM SPACE MARK */
1867     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1868     case 0x2000: /* EN QUAD */
1869     case 0x2001: /* EM QUAD */
1870     case 0x2002: /* EN SPACE */
1871     case 0x2003: /* EM SPACE */
1872     case 0x2004: /* THREE-PER-EM SPACE */
1873     case 0x2005: /* FOUR-PER-EM SPACE */
1874     case 0x2006: /* SIX-PER-EM SPACE */
1875     case 0x2007: /* FIGURE SPACE */
1876     case 0x2008: /* PUNCTUATION SPACE */
1877     case 0x2009: /* THIN SPACE */
1878     case 0x200A: /* HAIR SPACE */
1879     case 0x202f: /* NARROW NO-BREAK SPACE */
1880     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1881     case 0x3000: /* IDEOGRAPHIC SPACE */
1882     RRETURN(MATCH_NOMATCH);
1883     }
1884     ecode++;
1885     break;
1886    
1887     case OP_HSPACE:
1888 ph10 443 if (eptr >= md->end_subject)
1889 ph10 428 {
1890 ph10 443 SCHECK_PARTIAL();
1891 ph10 428 RRETURN(MATCH_NOMATCH);
1892 ph10 443 }
1893 ph10 178 GETCHARINCTEST(c, eptr);
1894     switch(c)
1895     {
1896     default: RRETURN(MATCH_NOMATCH);
1897     case 0x09: /* HT */
1898     case 0x20: /* SPACE */
1899     case 0xa0: /* NBSP */
1900     case 0x1680: /* OGHAM SPACE MARK */
1901     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1902     case 0x2000: /* EN QUAD */
1903     case 0x2001: /* EM QUAD */
1904     case 0x2002: /* EN SPACE */
1905     case 0x2003: /* EM SPACE */
1906     case 0x2004: /* THREE-PER-EM SPACE */
1907     case 0x2005: /* FOUR-PER-EM SPACE */
1908     case 0x2006: /* SIX-PER-EM SPACE */
1909     case 0x2007: /* FIGURE SPACE */
1910     case 0x2008: /* PUNCTUATION SPACE */
1911     case 0x2009: /* THIN SPACE */
1912     case 0x200A: /* HAIR SPACE */
1913     case 0x202f: /* NARROW NO-BREAK SPACE */
1914     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1915     case 0x3000: /* IDEOGRAPHIC SPACE */
1916     break;
1917     }
1918     ecode++;
1919     break;
1920    
1921     case OP_NOT_VSPACE:
1922 ph10 443 if (eptr >= md->end_subject)
1923 ph10 428 {
1924 ph10 443 SCHECK_PARTIAL();
1925 ph10 428 RRETURN(MATCH_NOMATCH);
1926 ph10 443 }
1927 ph10 178 GETCHARINCTEST(c, eptr);
1928     switch(c)
1929     {
1930     default: break;
1931     case 0x0a: /* LF */
1932     case 0x0b: /* VT */
1933     case 0x0c: /* FF */
1934     case 0x0d: /* CR */
1935     case 0x85: /* NEL */
1936     case 0x2028: /* LINE SEPARATOR */
1937     case 0x2029: /* PARAGRAPH SEPARATOR */
1938     RRETURN(MATCH_NOMATCH);
1939     }
1940     ecode++;
1941     break;
1942    
1943     case OP_VSPACE:
1944 ph10 443 if (eptr >= md->end_subject)
1945 ph10 428 {
1946 ph10 443 SCHECK_PARTIAL();
1947 ph10 428 RRETURN(MATCH_NOMATCH);
1948 ph10 443 }
1949 ph10 178 GETCHARINCTEST(c, eptr);
1950     switch(c)
1951     {
1952     default: RRETURN(MATCH_NOMATCH);
1953     case 0x0a: /* LF */
1954     case 0x0b: /* VT */
1955     case 0x0c: /* FF */
1956     case 0x0d: /* CR */
1957     case 0x85: /* NEL */
1958     case 0x2028: /* LINE SEPARATOR */
1959     case 0x2029: /* PARAGRAPH SEPARATOR */
1960     break;
1961     }
1962     ecode++;
1963     break;
1964    
1965 nigel 77 #ifdef SUPPORT_UCP
1966     /* Check the next character by Unicode property. We will get here only
1967     if the support is in the binary; otherwise a compile-time error occurs. */
1968    
1969     case OP_PROP:
1970     case OP_NOTPROP:
1971 ph10 443 if (eptr >= md->end_subject)
1972 ph10 428 {
1973 ph10 443 SCHECK_PARTIAL();
1974 ph10 428 RRETURN(MATCH_NOMATCH);
1975 ph10 443 }
1976 nigel 77 GETCHARINCTEST(c, eptr);
1977     {
1978 ph10 384 const ucd_record *prop = GET_UCD(c);
1979 nigel 77
1980 nigel 87 switch(ecode[1])
1981     {
1982     case PT_ANY:
1983     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1984     break;
1985 nigel 77
1986 nigel 87 case PT_LAMP:
1987 ph10 349 if ((prop->chartype == ucp_Lu ||
1988     prop->chartype == ucp_Ll ||
1989     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1990 nigel 77 RRETURN(MATCH_NOMATCH);
1991 nigel 87 break;
1992    
1993     case PT_GC:
1994 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1995 nigel 77 RRETURN(MATCH_NOMATCH);
1996 nigel 87 break;
1997    
1998     case PT_PC:
1999 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2000 nigel 87 RRETURN(MATCH_NOMATCH);
2001     break;
2002    
2003     case PT_SC:
2004 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2005 nigel 87 RRETURN(MATCH_NOMATCH);
2006     break;
2007    
2008     default:
2009     RRETURN(PCRE_ERROR_INTERNAL);
2010 nigel 77 }
2011 nigel 87
2012     ecode += 3;
2013 nigel 77 }
2014     break;
2015    
2016     /* Match an extended Unicode sequence. We will get here only if the support
2017     is in the binary; otherwise a compile-time error occurs. */
2018    
2019     case OP_EXTUNI:
2020 ph10 443 if (eptr >= md->end_subject)
2021 ph10 428 {
2022 ph10 443 SCHECK_PARTIAL();
2023 ph10 428 RRETURN(MATCH_NOMATCH);
2024 ph10 443 }
2025 nigel 77 GETCHARINCTEST(c, eptr);
2026     {
2027 ph10 349 int category = UCD_CATEGORY(c);
2028 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2029     while (eptr < md->end_subject)
2030     {
2031     int len = 1;
2032     if (!utf8) c = *eptr; else
2033     {
2034     GETCHARLEN(c, eptr, len);
2035     }
2036 ph10 349 category = UCD_CATEGORY(c);
2037 nigel 77 if (category != ucp_M) break;
2038     eptr += len;
2039     }
2040     }
2041     ecode++;
2042     break;
2043     #endif
2044    
2045    
2046     /* Match a back reference, possibly repeatedly. Look past the end of the
2047     item to see if there is repeat information following. The code is similar
2048     to that for character classes, but repeated for efficiency. Then obey
2049     similar code to character type repeats - written out again for speed.
2050     However, if the referenced string is the empty string, always treat
2051     it as matched, any number of times (otherwise there could be infinite
2052     loops). */
2053    
2054     case OP_REF:
2055     {
2056     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2057 ph10 345 ecode += 3;
2058    
2059 ph10 336 /* If the reference is unset, there are two possibilities:
2060 ph10 345
2061 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2062     than the amount of subject left; this ensures that every attempt at a
2063     match fails. We can't just fail here, because of the possibility of
2064     quantifiers with zero minima.
2065 ph10 345
2066     (b) If the JavaScript compatibility flag is set, set the length to zero
2067     so that the back reference matches an empty string.
2068    
2069     Otherwise, set the length to the length of what was matched by the
2070 ph10 336 referenced subpattern. */
2071 ph10 345
2072 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2073 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2074 ph10 336 else
2075     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2076 nigel 77
2077     /* Set up for repetition, or handle the non-repeated case */
2078    
2079     switch (*ecode)
2080     {
2081     case OP_CRSTAR:
2082     case OP_CRMINSTAR:
2083     case OP_CRPLUS:
2084     case OP_CRMINPLUS:
2085     case OP_CRQUERY:
2086     case OP_CRMINQUERY:
2087     c = *ecode++ - OP_CRSTAR;
2088     minimize = (c & 1) != 0;
2089     min = rep_min[c]; /* Pick up values from tables; */
2090     max = rep_max[c]; /* zero for max => infinity */
2091     if (max == 0) max = INT_MAX;
2092     break;
2093    
2094     case OP_CRRANGE:
2095     case OP_CRMINRANGE:
2096     minimize = (*ecode == OP_CRMINRANGE);
2097     min = GET2(ecode, 1);
2098     max = GET2(ecode, 3);
2099     if (max == 0) max = INT_MAX;
2100     ecode += 5;
2101     break;
2102    
2103     default: /* No repeat follows */
2104 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2105 ph10 428 {
2106 ph10 443 CHECK_PARTIAL();
2107 ph10 428 RRETURN(MATCH_NOMATCH);
2108 ph10 443 }
2109 nigel 77 eptr += length;
2110     continue; /* With the main loop */
2111     }
2112    
2113     /* If the length of the reference is zero, just continue with the
2114     main loop. */
2115 ph10 443
2116 nigel 77 if (length == 0) continue;
2117    
2118     /* First, ensure the minimum number of matches are present. We get back
2119     the length of the reference string explicitly rather than passing the
2120     address of eptr, so that eptr can be a register variable. */
2121    
2122     for (i = 1; i <= min; i++)
2123     {
2124 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2125 ph10 426 {
2126 ph10 427 CHECK_PARTIAL();
2127 ph10 426 RRETURN(MATCH_NOMATCH);
2128 ph10 427 }
2129 nigel 77 eptr += length;
2130     }
2131    
2132     /* If min = max, continue at the same level without recursion.
2133     They are not both allowed to be zero. */
2134    
2135     if (min == max) continue;
2136    
2137     /* If minimizing, keep trying and advancing the pointer */
2138    
2139     if (minimize)
2140     {
2141     for (fi = min;; fi++)
2142     {
2143 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2144 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2145 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2146     if (!match_ref(offset, eptr, length, md, ims))
2147 ph10 426 {
2148 ph10 427 CHECK_PARTIAL();
2149 nigel 77 RRETURN(MATCH_NOMATCH);
2150 ph10 427 }
2151 nigel 77 eptr += length;
2152     }
2153     /* Control never gets here */
2154     }
2155    
2156     /* If maximizing, find the longest string and work backwards */
2157    
2158     else
2159     {
2160     pp = eptr;
2161     for (i = min; i < max; i++)
2162     {
2163 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2164 ph10 462 {
2165 ph10 463 CHECK_PARTIAL();
2166 ph10 462 break;
2167 ph10 463 }
2168 nigel 77 eptr += length;
2169     }
2170     while (eptr >= pp)
2171     {
2172 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2173 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2174     eptr -= length;
2175     }
2176     RRETURN(MATCH_NOMATCH);
2177     }
2178     }
2179     /* Control never gets here */
2180    
2181     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2182     used when all the characters in the class have values in the range 0-255,
2183     and either the matching is caseful, or the characters are in the range
2184     0-127 when UTF-8 processing is enabled. The only difference between
2185     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2186     encountered.
2187    
2188     First, look past the end of the item to see if there is repeat information
2189     following. Then obey similar code to character type repeats - written out
2190     again for speed. */
2191    
2192     case OP_NCLASS:
2193     case OP_CLASS:
2194     {
2195     data = ecode + 1; /* Save for matching */
2196     ecode += 33; /* Advance past the item */
2197    
2198     switch (*ecode)
2199     {
2200     case OP_CRSTAR:
2201     case OP_CRMINSTAR:
2202     case OP_CRPLUS:
2203     case OP_CRMINPLUS:
2204     case OP_CRQUERY:
2205     case OP_CRMINQUERY:
2206     c = *ecode++ - OP_CRSTAR;
2207     minimize = (c & 1) != 0;
2208     min = rep_min[c]; /* Pick up values from tables; */
2209     max = rep_max[c]; /* zero for max => infinity */
2210     if (max == 0) max = INT_MAX;
2211     break;
2212    
2213     case OP_CRRANGE:
2214     case OP_CRMINRANGE:
2215     minimize = (*ecode == OP_CRMINRANGE);
2216     min = GET2(ecode, 1);
2217     max = GET2(ecode, 3);
2218     if (max == 0) max = INT_MAX;
2219     ecode += 5;
2220     break;
2221    
2222     default: /* No repeat follows */
2223     min = max = 1;
2224     break;
2225     }
2226    
2227     /* First, ensure the minimum number of matches are present. */
2228    
2229     #ifdef SUPPORT_UTF8
2230     /* UTF-8 mode */
2231     if (utf8)
2232     {
2233     for (i = 1; i <= min; i++)
2234     {
2235 ph10 427 if (eptr >= md->end_subject)
2236 ph10 426 {
2237 ph10 428 SCHECK_PARTIAL();
2238 ph10 426 RRETURN(MATCH_NOMATCH);
2239 ph10 427 }
2240 nigel 77 GETCHARINC(c, eptr);
2241     if (c > 255)
2242     {
2243     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2244     }
2245     else
2246     {
2247     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2248     }
2249     }
2250     }
2251     else
2252     #endif
2253     /* Not UTF-8 mode */
2254     {
2255     for (i = 1; i <= min; i++)
2256     {
2257 ph10 427 if (eptr >= md->end_subject)
2258 ph10 426 {
2259 ph10 428 SCHECK_PARTIAL();
2260 ph10 426 RRETURN(MATCH_NOMATCH);
2261 ph10 427 }
2262 nigel 77 c = *eptr++;
2263     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2264     }
2265     }
2266    
2267     /* If max == min we can continue with the main loop without the
2268     need to recurse. */
2269    
2270     if (min == max) continue;
2271    
2272     /* If minimizing, keep testing the rest of the expression and advancing
2273     the pointer while it matches the class. */
2274    
2275     if (minimize)
2276     {
2277     #ifdef SUPPORT_UTF8
2278     /* UTF-8 mode */
2279     if (utf8)
2280     {
2281     for (fi = min;; fi++)
2282     {
2283 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2284 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2285 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2286 ph10 427 if (eptr >= md->end_subject)
2287 ph10 426 {
2288 ph10 427 SCHECK_PARTIAL();
2289 ph10 426 RRETURN(MATCH_NOMATCH);
2290 ph10 427 }
2291 nigel 77 GETCHARINC(c, eptr);
2292     if (c > 255)
2293     {
2294     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2295     }
2296     else
2297     {
2298     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2299     }
2300     }
2301     }
2302     else
2303     #endif
2304     /* Not UTF-8 mode */
2305     {
2306     for (fi = min;; fi++)
2307     {
2308 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2309 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2310 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2311 ph10 427 if (eptr >= md->end_subject)
2312 ph10 426 {
2313 ph10 427 SCHECK_PARTIAL();
2314 ph10 426 RRETURN(MATCH_NOMATCH);
2315 ph10 427 }
2316 nigel 77 c = *eptr++;
2317     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2318     }
2319     }
2320     /* Control never gets here */
2321     }
2322    
2323     /* If maximizing, find the longest possible run, then work backwards. */
2324    
2325     else
2326     {
2327     pp = eptr;
2328    
2329     #ifdef SUPPORT_UTF8
2330     /* UTF-8 mode */
2331     if (utf8)
2332     {
2333     for (i = min; i < max; i++)
2334     {
2335     int len = 1;
2336 ph10 463 if (eptr >= md->end_subject)
2337 ph10 462 {
2338 ph10 463 SCHECK_PARTIAL();
2339 ph10 462 break;
2340 ph10 463 }
2341 nigel 77 GETCHARLEN(c, eptr, len);
2342     if (c > 255)
2343     {
2344     if (op == OP_CLASS) break;
2345     }
2346     else
2347     {
2348     if ((data[c/8] & (1 << (c&7))) == 0) break;
2349     }
2350     eptr += len;
2351     }
2352     for (;;)
2353     {
2354 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2355 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356     if (eptr-- == pp) break; /* Stop if tried at original pos */
2357     BACKCHAR(eptr);
2358     }
2359     }
2360     else
2361     #endif
2362     /* Not UTF-8 mode */
2363     {
2364     for (i = min; i < max; i++)
2365     {
2366 ph10 463 if (eptr >= md->end_subject)
2367 ph10 462 {
2368 ph10 463 SCHECK_PARTIAL();
2369 ph10 462 break;
2370 ph10 463 }
2371 nigel 77 c = *eptr;
2372     if ((data[c/8] & (1 << (c&7))) == 0) break;
2373     eptr++;
2374     }
2375     while (eptr >= pp)
2376     {
2377 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2378 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2379 nigel 77 eptr--;
2380     }
2381     }
2382    
2383     RRETURN(MATCH_NOMATCH);
2384     }
2385     }
2386     /* Control never gets here */
2387    
2388    
2389     /* Match an extended character class. This opcode is encountered only
2390 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2391     mode, because Unicode properties are supported in non-UTF-8 mode. */
2392 nigel 77
2393     #ifdef SUPPORT_UTF8
2394     case OP_XCLASS:
2395     {
2396     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2397     ecode += GET(ecode, 1); /* Advance past the item */
2398    
2399     switch (*ecode)
2400     {
2401     case OP_CRSTAR:
2402     case OP_CRMINSTAR:
2403     case OP_CRPLUS:
2404     case OP_CRMINPLUS:
2405     case OP_CRQUERY:
2406     case OP_CRMINQUERY:
2407     c = *ecode++ - OP_CRSTAR;
2408     minimize = (c & 1) != 0;
2409     min = rep_min[c]; /* Pick up values from tables; */
2410     max = rep_max[c]; /* zero for max => infinity */
2411     if (max == 0) max = INT_MAX;
2412     break;
2413    
2414     case OP_CRRANGE:
2415     case OP_CRMINRANGE:
2416     minimize = (*ecode == OP_CRMINRANGE);
2417     min = GET2(ecode, 1);
2418     max = GET2(ecode, 3);
2419     if (max == 0) max = INT_MAX;
2420     ecode += 5;
2421     break;
2422    
2423     default: /* No repeat follows */
2424     min = max = 1;
2425     break;
2426     }
2427    
2428     /* First, ensure the minimum number of matches are present. */
2429    
2430     for (i = 1; i <= min; i++)
2431     {
2432 ph10 427 if (eptr >= md->end_subject)
2433 ph10 426 {
2434     SCHECK_PARTIAL();
2435     RRETURN(MATCH_NOMATCH);
2436 ph10 427 }
2437 ph10 384 GETCHARINCTEST(c, eptr);
2438 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2439     }
2440    
2441     /* If max == min we can continue with the main loop without the
2442     need to recurse. */
2443    
2444     if (min == max) continue;
2445    
2446     /* If minimizing, keep testing the rest of the expression and advancing
2447     the pointer while it matches the class. */
2448    
2449     if (minimize)
2450     {
2451     for (fi = min;; fi++)
2452     {
2453 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2454 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2455 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2456 ph10 427 if (eptr >= md->end_subject)
2457 ph10 426 {
2458 ph10 427 SCHECK_PARTIAL();
2459 ph10 426 RRETURN(MATCH_NOMATCH);
2460 ph10 427 }
2461 ph10 384 GETCHARINCTEST(c, eptr);
2462 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2463     }
2464     /* Control never gets here */
2465     }
2466    
2467     /* If maximizing, find the longest possible run, then work backwards. */
2468    
2469     else
2470     {
2471     pp = eptr;
2472     for (i = min; i < max; i++)
2473     {
2474     int len = 1;
2475 ph10 463 if (eptr >= md->end_subject)
2476 ph10 462 {
2477 ph10 463 SCHECK_PARTIAL();
2478 ph10 462 break;
2479 ph10 463 }
2480 ph10 384 GETCHARLENTEST(c, eptr, len);
2481 nigel 77 if (!_pcre_xclass(c, data)) break;
2482     eptr += len;
2483     }
2484     for(;;)
2485     {
2486 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2487 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2488     if (eptr-- == pp) break; /* Stop if tried at original pos */
2489 ph10 214 if (utf8) BACKCHAR(eptr);
2490 nigel 77 }
2491     RRETURN(MATCH_NOMATCH);
2492     }
2493    
2494     /* Control never gets here */
2495     }
2496     #endif /* End of XCLASS */
2497    
2498     /* Match a single character, casefully */
2499    
2500     case OP_CHAR:
2501     #ifdef SUPPORT_UTF8
2502     if (utf8)
2503     {
2504     length = 1;
2505     ecode++;
2506     GETCHARLEN(fc, ecode, length);
2507 ph10 443 if (length > md->end_subject - eptr)
2508 ph10 428 {
2509     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2510     RRETURN(MATCH_NOMATCH);
2511 ph10 443 }
2512 nigel 77 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2513     }
2514     else
2515     #endif
2516    
2517     /* Non-UTF-8 mode */
2518     {
2519 ph10 443 if (md->end_subject - eptr < 1)
2520 ph10 428 {
2521     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2522     RRETURN(MATCH_NOMATCH);
2523 ph10 443 }
2524 nigel 77 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2525     ecode += 2;
2526     }
2527     break;
2528    
2529     /* Match a single character, caselessly */
2530    
2531     case OP_CHARNC:
2532     #ifdef SUPPORT_UTF8
2533     if (utf8)
2534     {
2535     length = 1;
2536     ecode++;
2537     GETCHARLEN(fc, ecode, length);
2538    
2539 ph10 443 if (length > md->end_subject - eptr)
2540 ph10 428 {
2541     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2542     RRETURN(MATCH_NOMATCH);
2543 ph10 443 }
2544 nigel 77
2545     /* If the pattern character's value is < 128, we have only one byte, and
2546     can use the fast lookup table. */
2547    
2548     if (fc < 128)
2549     {
2550     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2551     }
2552    
2553     /* Otherwise we must pick up the subject character */
2554    
2555     else
2556     {
2557 nigel 93 unsigned int dc;
2558 nigel 77 GETCHARINC(dc, eptr);
2559     ecode += length;
2560    
2561     /* If we have Unicode property support, we can use it to test the other
2562 nigel 87 case of the character, if there is one. */
2563 nigel 77
2564     if (fc != dc)
2565     {
2566     #ifdef SUPPORT_UCP
2567 ph10 349 if (dc != UCD_OTHERCASE(fc))
2568 nigel 77 #endif
2569     RRETURN(MATCH_NOMATCH);
2570     }
2571     }
2572     }
2573     else
2574     #endif /* SUPPORT_UTF8 */
2575    
2576     /* Non-UTF-8 mode */
2577     {
2578 ph10 443 if (md->end_subject - eptr < 1)
2579 ph10 428 {
2580 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2581 ph10 428 RRETURN(MATCH_NOMATCH);
2582 ph10 443 }
2583 nigel 77 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2584     ecode += 2;
2585     }
2586     break;
2587    
2588 nigel 93 /* Match a single character repeatedly. */
2589 nigel 77
2590     case OP_EXACT:
2591     min = max = GET2(ecode, 1);
2592     ecode += 3;
2593     goto REPEATCHAR;
2594    
2595 nigel 93 case OP_POSUPTO:
2596     possessive = TRUE;
2597     /* Fall through */
2598    
2599 nigel 77 case OP_UPTO:
2600     case OP_MINUPTO:
2601     min = 0;
2602     max = GET2(ecode, 1);
2603     minimize = *ecode == OP_MINUPTO;
2604     ecode += 3;
2605     goto REPEATCHAR;
2606    
2607 nigel 93 case OP_POSSTAR:
2608     possessive = TRUE;
2609     min = 0;
2610     max = INT_MAX;
2611     ecode++;
2612     goto REPEATCHAR;
2613    
2614     case OP_POSPLUS:
2615     possessive = TRUE;
2616     min = 1;
2617     max = INT_MAX;
2618     ecode++;
2619     goto REPEATCHAR;
2620    
2621     case OP_POSQUERY:
2622     possessive = TRUE;
2623     min = 0;
2624     max = 1;
2625     ecode++;
2626     goto REPEATCHAR;
2627    
2628 nigel 77 case OP_STAR:
2629     case OP_MINSTAR:
2630     case OP_PLUS:
2631     case OP_MINPLUS:
2632     case OP_QUERY:
2633     case OP_MINQUERY:
2634     c = *ecode++ - OP_STAR;
2635     minimize = (c & 1) != 0;
2636 ph10 443
2637 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2638     max = rep_max[c]; /* zero for max => infinity */
2639     if (max == 0) max = INT_MAX;
2640    
2641 ph10 426 /* Common code for all repeated single-character matches. */
2642 nigel 77
2643     REPEATCHAR:
2644     #ifdef SUPPORT_UTF8
2645     if (utf8)
2646     {
2647     length = 1;
2648     charptr = ecode;
2649     GETCHARLEN(fc, ecode, length);
2650     ecode += length;
2651    
2652     /* Handle multibyte character matching specially here. There is
2653     support for caseless matching if UCP support is present. */
2654    
2655     if (length > 1)
2656     {
2657     #ifdef SUPPORT_UCP
2658 nigel 93 unsigned int othercase;
2659 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2660 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2661 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2662 ph10 115 else oclength = 0;
2663 nigel 77 #endif /* SUPPORT_UCP */
2664    
2665     for (i = 1; i <= min; i++)
2666     {
2667 ph10 426 if (eptr <= md->end_subject - length &&
2668     memcmp(eptr, charptr, length) == 0) eptr += length;
2669 ph10 123 #ifdef SUPPORT_UCP
2670 ph10 426 else if (oclength > 0 &&
2671     eptr <= md->end_subject - oclength &&
2672     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2673     #endif /* SUPPORT_UCP */
2674 nigel 77 else
2675     {
2676 ph10 426 CHECK_PARTIAL();
2677     RRETURN(MATCH_NOMATCH);
2678 nigel 77 }
2679     }
2680    
2681     if (min == max) continue;
2682    
2683     if (minimize)
2684     {
2685     for (fi = min;; fi++)
2686     {
2687 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2688 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2689 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2690 ph10 426 if (eptr <= md->end_subject - length &&
2691     memcmp(eptr, charptr, length) == 0) eptr += length;
2692 ph10 123 #ifdef SUPPORT_UCP
2693 ph10 426 else if (oclength > 0 &&
2694     eptr <= md->end_subject - oclength &&
2695     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2696     #endif /* SUPPORT_UCP */
2697 nigel 77 else
2698     {
2699 ph10 426 CHECK_PARTIAL();
2700     RRETURN(MATCH_NOMATCH);
2701 nigel 77 }
2702     }
2703     /* Control never gets here */
2704     }
2705 nigel 93
2706     else /* Maximize */
2707 nigel 77 {
2708     pp = eptr;
2709     for (i = min; i < max; i++)
2710     {
2711 ph10 426 if (eptr <= md->end_subject - length &&
2712     memcmp(eptr, charptr, length) == 0) eptr += length;
2713 ph10 123 #ifdef SUPPORT_UCP
2714 ph10 426 else if (oclength > 0 &&
2715     eptr <= md->end_subject - oclength &&
2716     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2717     #endif /* SUPPORT_UCP */
2718 ph10 463 else
2719 ph10 462 {
2720 ph10 463 CHECK_PARTIAL();
2721 ph10 462 break;
2722 ph10 463 }
2723 nigel 77 }
2724 nigel 93
2725     if (possessive) continue;
2726 ph10 427
2727 ph10 120 for(;;)
2728 ph10 426 {
2729     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2730     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2731     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2732 ph10 115 #ifdef SUPPORT_UCP
2733 ph10 426 eptr--;
2734     BACKCHAR(eptr);
2735 ph10 123 #else /* without SUPPORT_UCP */
2736 ph10 426 eptr -= length;
2737 ph10 123 #endif /* SUPPORT_UCP */
2738 ph10 426 }
2739 nigel 77 }
2740     /* Control never gets here */
2741     }
2742    
2743     /* If the length of a UTF-8 character is 1, we fall through here, and
2744     obey the code as for non-UTF-8 characters below, though in this case the
2745     value of fc will always be < 128. */
2746     }
2747     else
2748     #endif /* SUPPORT_UTF8 */
2749    
2750     /* When not in UTF-8 mode, load a single-byte character. */
2751    
2752 ph10 426 fc = *ecode++;
2753 ph10 443
2754 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2755     may not be in UTF-8 mode. The code is duplicated for the caseless and
2756     caseful cases, for speed, since matching characters is likely to be quite
2757     common. First, ensure the minimum number of matches are present. If min =
2758     max, continue at the same level without recursing. Otherwise, if
2759     minimizing, keep trying the rest of the expression and advancing one
2760     matching character if failing, up to the maximum. Alternatively, if
2761     maximizing, find the maximum number of characters and work backwards. */
2762    
2763     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2764     max, eptr));
2765    
2766     if ((ims & PCRE_CASELESS) != 0)
2767     {
2768     fc = md->lcc[fc];
2769     for (i = 1; i <= min; i++)
2770 ph10 426 {
2771     if (eptr >= md->end_subject)
2772     {
2773     SCHECK_PARTIAL();
2774     RRETURN(MATCH_NOMATCH);
2775     }
2776 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2777 ph10 426 }
2778 nigel 77 if (min == max) continue;
2779     if (minimize)
2780     {
2781     for (fi = min;; fi++)
2782     {
2783 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2784 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2785 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2786 ph10 426 if (eptr >= md->end_subject)
2787     {
2788 ph10 427 SCHECK_PARTIAL();
2789 ph10 426 RRETURN(MATCH_NOMATCH);
2790     }
2791     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2792 nigel 77 }
2793     /* Control never gets here */
2794     }
2795 nigel 93 else /* Maximize */
2796 nigel 77 {
2797     pp = eptr;
2798     for (i = min; i < max; i++)
2799     {
2800 ph10 463 if (eptr >= md->end_subject)
2801 ph10 462 {
2802     SCHECK_PARTIAL();
2803     break;
2804 ph10 463 }
2805 ph10 462 if (fc != md->lcc[*eptr]) break;
2806 nigel 77 eptr++;
2807     }
2808 ph10 427
2809 nigel 93 if (possessive) continue;
2810 ph10 427
2811 nigel 77 while (eptr >= pp)
2812     {
2813 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2814 nigel 77 eptr--;
2815     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2816     }
2817     RRETURN(MATCH_NOMATCH);
2818     }
2819     /* Control never gets here */
2820     }
2821    
2822     /* Caseful comparisons (includes all multi-byte characters) */
2823    
2824     else
2825     {
2826 ph10 427 for (i = 1; i <= min; i++)
2827 ph10 426 {
2828     if (eptr >= md->end_subject)
2829     {
2830     SCHECK_PARTIAL();
2831     RRETURN(MATCH_NOMATCH);
2832     }
2833     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2834 ph10 427 }
2835 ph10 443
2836 nigel 77 if (min == max) continue;
2837 ph10 443
2838 nigel 77 if (minimize)
2839     {
2840     for (fi = min;; fi++)
2841     {
2842 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2843 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2844 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2845 ph10 426 if (eptr >= md->end_subject)
2846 ph10 427 {
2847 ph10 426 SCHECK_PARTIAL();
2848     RRETURN(MATCH_NOMATCH);
2849 ph10 427 }
2850 ph10 426 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2851 nigel 77 }
2852     /* Control never gets here */
2853     }
2854 nigel 93 else /* Maximize */
2855 nigel 77 {
2856     pp = eptr;
2857     for (i = min; i < max; i++)
2858     {
2859 ph10 463 if (eptr >= md->end_subject)
2860 ph10 462 {
2861 ph10 463 SCHECK_PARTIAL();
2862 ph10 462 break;
2863 ph10 463 }
2864 ph10 462 if (fc != *eptr) break;
2865 nigel 77 eptr++;
2866     }
2867 nigel 93 if (possessive) continue;
2868 ph10 443
2869 nigel 77 while (eptr >= pp)
2870     {
2871 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2872 nigel 77 eptr--;
2873     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2874     }
2875     RRETURN(MATCH_NOMATCH);
2876     }
2877     }
2878     /* Control never gets here */
2879    
2880     /* Match a negated single one-byte character. The character we are
2881     checking can be multibyte. */
2882    
2883     case OP_NOT:
2884 ph10 443 if (eptr >= md->end_subject)
2885 ph10 428 {
2886 ph10 443 SCHECK_PARTIAL();
2887 ph10 428 RRETURN(MATCH_NOMATCH);
2888 ph10 443 }
2889 nigel 77 ecode++;
2890     GETCHARINCTEST(c, eptr);
2891     if ((ims & PCRE_CASELESS) != 0)
2892     {
2893     #ifdef SUPPORT_UTF8
2894     if (c < 256)
2895     #endif
2896     c = md->lcc[c];
2897     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2898     }
2899     else
2900     {
2901     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2902     }
2903     break;
2904    
2905     /* Match a negated single one-byte character repeatedly. This is almost a
2906     repeat of the code for a repeated single character, but I haven't found a
2907     nice way of commoning these up that doesn't require a test of the
2908     positive/negative option for each character match. Maybe that wouldn't add
2909     very much to the time taken, but character matching *is* what this is all
2910     about... */
2911    
2912     case OP_NOTEXACT:
2913     min = max = GET2(ecode, 1);
2914     ecode += 3;
2915     goto REPEATNOTCHAR;
2916    
2917     case OP_NOTUPTO:
2918     case OP_NOTMINUPTO:
2919     min = 0;
2920     max = GET2(ecode, 1);
2921     minimize = *ecode == OP_NOTMINUPTO;
2922     ecode += 3;
2923     goto REPEATNOTCHAR;
2924    
2925 nigel 93 case OP_NOTPOSSTAR:
2926     possessive = TRUE;
2927     min = 0;
2928     max = INT_MAX;
2929     ecode++;
2930     goto REPEATNOTCHAR;
2931    
2932     case OP_NOTPOSPLUS:
2933     possessive = TRUE;
2934     min = 1;
2935     max = INT_MAX;
2936     ecode++;
2937     goto REPEATNOTCHAR;
2938    
2939     case OP_NOTPOSQUERY:
2940     possessive = TRUE;
2941     min = 0;
2942     max = 1;
2943     ecode++;
2944     goto REPEATNOTCHAR;
2945    
2946     case OP_NOTPOSUPTO:
2947     possessive = TRUE;
2948     min = 0;
2949     max = GET2(ecode, 1);
2950     ecode += 3;
2951     goto REPEATNOTCHAR;
2952    
2953 nigel 77 case OP_NOTSTAR:
2954     case OP_NOTMINSTAR:
2955     case OP_NOTPLUS:
2956     case OP_NOTMINPLUS:
2957     case OP_NOTQUERY:
2958     case OP_NOTMINQUERY:
2959     c = *ecode++ - OP_NOTSTAR;
2960     minimize = (c & 1) != 0;
2961     min = rep_min[c]; /* Pick up values from tables; */
2962     max = rep_max[c]; /* zero for max => infinity */
2963     if (max == 0) max = INT_MAX;
2964    
2965 ph10 426 /* Common code for all repeated single-byte matches. */
2966 nigel 77
2967     REPEATNOTCHAR:
2968     fc = *ecode++;
2969    
2970     /* The code is duplicated for the caseless and caseful cases, for speed,
2971     since matching characters is likely to be quite common. First, ensure the
2972     minimum number of matches are present. If min = max, continue at the same
2973     level without recursing. Otherwise, if minimizing, keep trying the rest of
2974     the expression and advancing one matching character if failing, up to the
2975     maximum. Alternatively, if maximizing, find the maximum number of
2976     characters and work backwards. */
2977    
2978     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2979     max, eptr));
2980    
2981     if ((ims & PCRE_CASELESS) != 0)
2982     {
2983     fc = md->lcc[fc];
2984    
2985     #ifdef SUPPORT_UTF8
2986     /* UTF-8 mode */
2987     if (utf8)
2988     {
2989 nigel 93 register unsigned int d;
2990 nigel 77 for (i = 1; i <= min; i++)
2991     {
2992 ph10 426 if (eptr >= md->end_subject)
2993     {
2994     SCHECK_PARTIAL();
2995 ph10 427 RRETURN(MATCH_NOMATCH);
2996     }
2997 nigel 77 GETCHARINC(d, eptr);
2998     if (d < 256) d = md->lcc[d];
2999     if (fc == d) RRETURN(MATCH_NOMATCH);
3000     }
3001     }
3002     else
3003     #endif
3004    
3005     /* Not UTF-8 mode */
3006     {
3007     for (i = 1; i <= min; i++)
3008 ph10 426 {
3009     if (eptr >= md->end_subject)
3010     {
3011     SCHECK_PARTIAL();
3012 ph10 427 RRETURN(MATCH_NOMATCH);
3013     }
3014 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3015 ph10 427 }
3016 nigel 77 }
3017    
3018     if (min == max) continue;
3019    
3020     if (minimize)
3021     {
3022     #ifdef SUPPORT_UTF8
3023     /* UTF-8 mode */
3024     if (utf8)
3025     {
3026 nigel 93 register unsigned int d;
3027 nigel 77 for (fi = min;; fi++)
3028     {
3029 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3030 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3031 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3032 ph10 427 if (eptr >= md->end_subject)
3033 ph10 426 {
3034 ph10 427 SCHECK_PARTIAL();
3035 ph10 426 RRETURN(MATCH_NOMATCH);
3036 ph10 427 }
3037 nigel 77 GETCHARINC(d, eptr);
3038     if (d < 256) d = md->lcc[d];
3039 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
3040 nigel 77 }
3041     }
3042     else
3043     #endif
3044     /* Not UTF-8 mode */
3045     {
3046     for (fi = min;; fi++)
3047     {
3048 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3049 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3050 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3051 ph10 426 if (eptr >= md->end_subject)
3052     {
3053     SCHECK_PARTIAL();
3054     RRETURN(MATCH_NOMATCH);
3055     }
3056     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3057 nigel 77 }
3058     }
3059     /* Control never gets here */
3060     }
3061    
3062     /* Maximize case */
3063    
3064     else
3065     {
3066     pp = eptr;
3067    
3068     #ifdef SUPPORT_UTF8
3069     /* UTF-8 mode */
3070     if (utf8)
3071     {
3072 nigel 93 register unsigned int d;
3073 nigel 77 for (i = min; i < max; i++)
3074     {
3075     int len = 1;
3076 ph10 463 if (eptr >= md->end_subject)
3077 ph10 462 {
3078 ph10 463 SCHECK_PARTIAL();
3079 ph10 462 break;
3080 ph10 463 }
3081 nigel 77 GETCHARLEN(d, eptr, len);
3082     if (d < 256) d = md->lcc[d];
3083     if (fc == d) break;
3084     eptr += len;
3085     }
3086 nigel 93 if (possessive) continue;
3087     for(;;)
3088 nigel 77 {
3089 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3090 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3091     if (eptr-- == pp) break; /* Stop if tried at original pos */
3092     BACKCHAR(eptr);
3093     }
3094     }
3095     else
3096     #endif
3097     /* Not UTF-8 mode */
3098     {
3099     for (i = min; i < max; i++)
3100     {
3101 ph10 463 if (eptr >= md->end_subject)
3102 ph10 462 {
3103     SCHECK_PARTIAL();
3104     break;
3105 ph10 463 }
3106 ph10 462 if (fc == md->lcc[*eptr]) break;
3107 nigel 77 eptr++;
3108     }
3109 nigel 93 if (possessive) continue;
3110 nigel 77 while (eptr >= pp)
3111     {
3112 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3113 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3114     eptr--;
3115     }
3116     }
3117    
3118     RRETURN(MATCH_NOMATCH);
3119     }
3120     /* Control never gets here */
3121     }
3122    
3123     /* Caseful comparisons */
3124    
3125     else
3126     {
3127     #ifdef SUPPORT_UTF8
3128     /* UTF-8 mode */
3129     if (utf8)
3130     {
3131 nigel 93 register unsigned int d;
3132 nigel 77 for (i = 1; i <= min; i++)
3133     {
3134 ph10 426 if (eptr >= md->end_subject)
3135     {
3136     SCHECK_PARTIAL();
3137 ph10 427 RRETURN(MATCH_NOMATCH);
3138     }
3139 nigel 77 GETCHARINC(d, eptr);
3140     if (fc == d) RRETURN(MATCH_NOMATCH);
3141     }
3142     }
3143     else
3144     #endif
3145     /* Not UTF-8 mode */
3146     {
3147     for (i = 1; i <= min; i++)
3148 ph10 426 {
3149     if (eptr >= md->end_subject)
3150     {
3151     SCHECK_PARTIAL();
3152 ph10 427 RRETURN(MATCH_NOMATCH);
3153     }
3154 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3155 ph10 427 }
3156 nigel 77 }
3157    
3158     if (min == max) continue;
3159    
3160     if (minimize)
3161     {
3162     #ifdef SUPPORT_UTF8
3163     /* UTF-8 mode */
3164     if (utf8)
3165     {
3166 nigel 93 register unsigned int d;
3167 nigel 77 for (fi = min;; fi++)
3168     {
3169 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3170 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3171 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3172 ph10 427 if (eptr >= md->end_subject)
3173 ph10 426 {
3174 ph10 427 SCHECK_PARTIAL();
3175 ph10 426 RRETURN(MATCH_NOMATCH);
3176 ph10 427 }
3177 nigel 77 GETCHARINC(d, eptr);
3178 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
3179 nigel 77 }
3180     }
3181     else
3182     #endif
3183     /* Not UTF-8 mode */
3184     {
3185     for (fi = min;; fi++)
3186     {
3187 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3188 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3189 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3190 ph10 426 if (eptr >= md->end_subject)
3191     {
3192     SCHECK_PARTIAL();
3193     RRETURN(MATCH_NOMATCH);
3194 ph10 427 }
3195 ph10 426 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3196 nigel 77 }
3197     }
3198     /* Control never gets here */
3199     }
3200    
3201     /* Maximize case */
3202    
3203     else
3204     {
3205     pp = eptr;
3206    
3207     #ifdef SUPPORT_UTF8
3208     /* UTF-8 mode */
3209     if (utf8)
3210     {
3211 nigel 93 register unsigned int d;
3212 nigel 77 for (i = min; i < max; i++)
3213     {
3214     int len = 1;
3215 ph10 463 if (eptr >= md->end_subject)
3216 ph10 462 {
3217 ph10 463 SCHECK_PARTIAL();
3218 ph10 462 break;
3219 ph10 463 }
3220 nigel 77 GETCHARLEN(d, eptr, len);
3221     if (fc == d) break;
3222     eptr += len;
3223     }
3224 nigel 93 if (possessive) continue;
3225 nigel 77 for(;;)
3226     {
3227 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3228 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3229     if (eptr-- == pp) break; /* Stop if tried at original pos */
3230     BACKCHAR(eptr);
3231     }
3232     }
3233     else
3234     #endif
3235     /* Not UTF-8 mode */
3236     {
3237     for (i = min; i < max; i++)
3238     {
3239 ph10 463 if (eptr >= md->end_subject)
3240 ph10 462 {
3241 ph10 463 SCHECK_PARTIAL();
3242 ph10 462 break;
3243 ph10 463 }
3244 ph10 462 if (fc == *eptr) break;
3245 nigel 77 eptr++;
3246     }
3247 nigel 93 if (possessive) continue;
3248 nigel 77 while (eptr >= pp)
3249     {
3250 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3251 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3252     eptr--;
3253     }
3254     }
3255    
3256     RRETURN(MATCH_NOMATCH);
3257     }
3258     }
3259     /* Control never gets here */
3260    
3261     /* Match a single character type repeatedly; several different opcodes
3262     share code. This is very similar to the code for single characters, but we
3263     repeat it in the interests of efficiency. */
3264    
3265     case OP_TYPEEXACT:
3266     min = max = GET2(ecode, 1);
3267     minimize = TRUE;
3268     ecode += 3;
3269     goto REPEATTYPE;
3270    
3271     case OP_TYPEUPTO:
3272     case OP_TYPEMINUPTO:
3273     min = 0;
3274     max = GET2(ecode, 1);
3275     minimize = *ecode == OP_TYPEMINUPTO;
3276     ecode += 3;
3277     goto REPEATTYPE;
3278    
3279 nigel 93 case OP_TYPEPOSSTAR:
3280     possessive = TRUE;
3281     min = 0;
3282     max = INT_MAX;
3283     ecode++;
3284     goto REPEATTYPE;
3285    
3286     case OP_TYPEPOSPLUS:
3287     possessive = TRUE;
3288     min = 1;
3289     max = INT_MAX;
3290     ecode++;
3291     goto REPEATTYPE;
3292    
3293     case OP_TYPEPOSQUERY:
3294     possessive = TRUE;
3295     min = 0;
3296     max = 1;
3297     ecode++;
3298     goto REPEATTYPE;
3299    
3300     case OP_TYPEPOSUPTO:
3301     possessive = TRUE;
3302     min = 0;
3303     max = GET2(ecode, 1);
3304     ecode += 3;
3305     goto REPEATTYPE;
3306    
3307 nigel 77 case OP_TYPESTAR:
3308     case OP_TYPEMINSTAR:
3309     case OP_TYPEPLUS:
3310     case OP_TYPEMINPLUS:
3311     case OP_TYPEQUERY:
3312     case OP_TYPEMINQUERY:
3313     c = *ecode++ - OP_TYPESTAR;
3314     minimize = (c & 1) != 0;
3315     min = rep_min[c]; /* Pick up values from tables; */
3316     max = rep_max[c]; /* zero for max => infinity */
3317     if (max == 0) max = INT_MAX;
3318    
3319     /* Common code for all repeated single character type matches. Note that
3320     in UTF-8 mode, '.' matches a character of any length, but for the other
3321     character types, the valid characters are all one-byte long. */
3322    
3323     REPEATTYPE:
3324     ctype = *ecode++; /* Code for the character type */
3325    
3326     #ifdef SUPPORT_UCP
3327     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3328     {
3329     prop_fail_result = ctype == OP_NOTPROP;
3330     prop_type = *ecode++;
3331 nigel 87 prop_value = *ecode++;
3332 nigel 77 }
3333     else prop_type = -1;
3334     #endif
3335    
3336     /* First, ensure the minimum number of matches are present. Use inline
3337     code for maximizing the speed, and do the type test once at the start
3338 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3339 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3340     and single-bytes. */
3341    
3342     if (min > 0)
3343     {
3344     #ifdef SUPPORT_UCP
3345 nigel 87 if (prop_type >= 0)
3346 nigel 77 {
3347 nigel 87 switch(prop_type)
3348 nigel 77 {
3349 nigel 87 case PT_ANY:
3350     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3351     for (i = 1; i <= min; i++)
3352     {
3353 ph10 427 if (eptr >= md->end_subject)
3354 ph10 426 {
3355 ph10 427 SCHECK_PARTIAL();
3356 ph10 426 RRETURN(MATCH_NOMATCH);
3357 ph10 427 }
3358 ph10 184 GETCHARINCTEST(c, eptr);
3359 nigel 87 }
3360     break;
3361    
3362     case PT_LAMP:
3363     for (i = 1; i <= min; i++)
3364     {
3365 ph10 427 if (eptr >= md->end_subject)
3366 ph10 426 {
3367 ph10 427 SCHECK_PARTIAL();
3368 ph10 426 RRETURN(MATCH_NOMATCH);
3369 ph10 427 }
3370 ph10 184 GETCHARINCTEST(c, eptr);
3371 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3372 nigel 87 if ((prop_chartype == ucp_Lu ||
3373     prop_chartype == ucp_Ll ||
3374     prop_chartype == ucp_Lt) == prop_fail_result)
3375     RRETURN(MATCH_NOMATCH);
3376     }
3377     break;
3378    
3379     case PT_GC:
3380     for (i = 1; i <= min; i++)
3381     {
3382 ph10 427 if (eptr >= md->end_subject)
3383 ph10 426 {
3384 ph10 427 SCHECK_PARTIAL();
3385 ph10 426 RRETURN(MATCH_NOMATCH);
3386 ph10 427 }
3387 ph10 184 GETCHARINCTEST(c, eptr);
3388 ph10 349 prop_category = UCD_CATEGORY(c);
3389 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3390     RRETURN(MATCH_NOMATCH);
3391     }
3392     break;
3393    
3394     case PT_PC:
3395     for (i = 1; i <= min; i++)
3396     {
3397 ph10 427 if (eptr >= md->end_subject)
3398 ph10 426 {
3399 ph10 427 SCHECK_PARTIAL();
3400 ph10 426 RRETURN(MATCH_NOMATCH);
3401 ph10 427 }
3402 ph10 184 GETCHARINCTEST(c, eptr);
3403 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3404 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3405     RRETURN(MATCH_NOMATCH);
3406     }
3407     break;
3408    
3409     case PT_SC:
3410     for (i = 1; i <= min; i++)
3411     {
3412 ph10 427 if (eptr >= md->end_subject)
3413 ph10 426 {
3414 ph10 427 SCHECK_PARTIAL();
3415 ph10 426 RRETURN(MATCH_NOMATCH);
3416 ph10 427 }
3417 ph10 184 GETCHARINCTEST(c, eptr);
3418 ph10 349 prop_script = UCD_SCRIPT(c);
3419 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3420     RRETURN(MATCH_NOMATCH);
3421     }
3422     break;
3423    
3424     default:
3425     RRETURN(PCRE_ERROR_INTERNAL);
3426 nigel 77 }
3427     }
3428    
3429     /* Match extended Unicode sequences. We will get here only if the
3430     support is in the binary; otherwise a compile-time error occurs. */
3431    
3432     else if (ctype == OP_EXTUNI)
3433     {
3434     for (i = 1; i <= min; i++)
3435     {
3436 ph10 427 if (eptr >= md->end_subject)
3437 ph10 426 {
3438 ph10 427 SCHECK_PARTIAL();
3439 ph10 426 RRETURN(MATCH_NOMATCH);
3440 ph10 427 }
3441 nigel 77 GETCHARINCTEST(c, eptr);
3442 ph10 349 prop_category = UCD_CATEGORY(c);
3443 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3444     while (eptr < md->end_subject)
3445     {
3446     int len = 1;
3447 ph10 426 if (!utf8) c = *eptr;
3448     else { GETCHARLEN(c, eptr, len); }
3449 ph10 349 prop_category = UCD_CATEGORY(c);
3450 nigel 77 if (prop_category != ucp_M) break;
3451     eptr += len;
3452     }
3453     }
3454     }
3455    
3456     else
3457     #endif /* SUPPORT_UCP */
3458    
3459     /* Handle all other cases when the coding is UTF-8 */
3460    
3461     #ifdef SUPPORT_UTF8
3462     if (utf8) switch(ctype)
3463     {
3464     case OP_ANY:
3465     for (i = 1; i <= min; i++)
3466     {
3467 ph10 426 if (eptr >= md->end_subject)
3468     {
3469 ph10 427 SCHECK_PARTIAL();
3470 nigel 77 RRETURN(MATCH_NOMATCH);
3471 ph10 427 }
3472 ph10 426 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3473 nigel 91 eptr++;
3474 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3475     }
3476     break;
3477    
3478 ph10 341 case OP_ALLANY:
3479     for (i = 1; i <= min; i++)
3480     {
3481 ph10 427 if (eptr >= md->end_subject)
3482 ph10 426 {
3483     SCHECK_PARTIAL();
3484     RRETURN(MATCH_NOMATCH);
3485 ph10 427 }
3486 ph10 341 eptr++;
3487     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3488     }
3489     break;
3490    
3491 nigel 77 case OP_ANYBYTE:
3492 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3493 nigel 77 eptr += min;
3494     break;
3495    
3496 nigel 93 case OP_ANYNL:
3497     for (i = 1; i <= min; i++)
3498     {
3499 ph10 427 if (eptr >= md->end_subject)
3500 ph10 426 {
3501     SCHECK_PARTIAL();
3502     RRETURN(MATCH_NOMATCH);
3503 ph10 427 }
3504 nigel 93 GETCHARINC(c, eptr);
3505     switch(c)
3506     {
3507     default: RRETURN(MATCH_NOMATCH);
3508     case 0x000d:
3509     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3510     break;
3511 ph10 231
3512 nigel 93 case 0x000a:
3513 ph10 231 break;
3514    
3515 nigel 93 case 0x000b:
3516     case 0x000c:
3517     case 0x0085:
3518     case 0x2028:
3519     case 0x2029:
3520 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3521 nigel 93 break;
3522     }
3523     }
3524     break;
3525    
3526 ph10 178 case OP_NOT_HSPACE:
3527     for (i = 1; i <= min; i++)
3528     {
3529 ph10 427 if (eptr >= md->end_subject)
3530 ph10 426 {
3531     SCHECK_PARTIAL();
3532     RRETURN(MATCH_NOMATCH);
3533 ph10 427 }
3534 ph10 178 GETCHARINC(c, eptr);
3535     switch(c)
3536     {
3537     default: break;
3538     case 0x09: /* HT */
3539     case 0x20: /* SPACE */
3540     case 0xa0: /* NBSP */
3541     case 0x1680: /* OGHAM SPACE MARK */
3542     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3543     case 0x2000: /* EN QUAD */
3544     case 0x2001: /* EM QUAD */
3545     case 0x2002: /* EN SPACE */
3546     case 0x2003: /* EM SPACE */
3547     case 0x2004: /* THREE-PER-EM SPACE */
3548     case 0x2005: /* FOUR-PER-EM SPACE */
3549     case 0x2006: /* SIX-PER-EM SPACE */
3550     case 0x2007: /* FIGURE SPACE */
3551     case 0x2008: /* PUNCTUATION SPACE */
3552     case 0x2009: /* THIN SPACE */
3553     case 0x200A: /* HAIR SPACE */
3554     case 0x202f: /* NARROW NO-BREAK SPACE */
3555     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3556     case 0x3000: /* IDEOGRAPHIC SPACE */
3557     RRETURN(MATCH_NOMATCH);
3558     }
3559     }
3560     break;
3561 ph10 182
3562 ph10 178 case OP_HSPACE:
3563     for (i = 1; i <= min; i++)
3564     {
3565 ph10 427 if (eptr >= md->end_subject)
3566 ph10 426 {
3567 ph10 427 SCHECK_PARTIAL();
3568 ph10 426 RRETURN(MATCH_NOMATCH);
3569 ph10 427 }
3570 ph10 178 GETCHARINC(c, eptr);
3571     switch(c)
3572     {
3573     default: RRETURN(MATCH_NOMATCH);
3574     case 0x09: /* HT */
3575     case 0x20: /* SPACE */
3576     case 0xa0: /* NBSP */
3577     case 0x1680: /* OGHAM SPACE MARK */
3578     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3579     case 0x2000: /* EN QUAD */
3580     case 0x2001: /* EM QUAD */
3581     case 0x2002: /* EN SPACE */
3582     case 0x2003: /* EM SPACE */
3583     case 0x2004: /* THREE-PER-EM SPACE */
3584     case 0x2005: /* FOUR-PER-EM SPACE */
3585     case 0x2006: /* SIX-PER-EM SPACE */
3586     case 0x2007: /* FIGURE SPACE */
3587     case 0x2008: /* PUNCTUATION SPACE */
3588     case 0x2009: /* THIN SPACE */
3589     case 0x200A: /* HAIR SPACE */
3590     case 0x202f: /* NARROW NO-BREAK SPACE */
3591     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3592     case 0x3000: /* IDEOGRAPHIC SPACE */
3593     break;
3594     }
3595     }
3596     break;
3597 ph10 182
3598 ph10 178 case OP_NOT_VSPACE:
3599     for (i = 1; i <= min; i++)
3600     {
3601 ph10 427 if (eptr >= md->end_subject)
3602 ph10 426 {
3603 ph10 427 SCHECK_PARTIAL();
3604 ph10 426 RRETURN(MATCH_NOMATCH);
3605 ph10 427 }
3606 ph10 178 GETCHARINC(c, eptr);
3607     switch(c)
3608     {
3609     default: break;
3610     case 0x0a: /* LF */
3611     case 0x0b: /* VT */
3612     case 0x0c: /* FF */
3613     case 0x0d: /* CR */
3614     case 0x85: /* NEL */
3615     case 0x2028: /* LINE SEPARATOR */
3616     case 0x2029: /* PARAGRAPH SEPARATOR */
3617     RRETURN(MATCH_NOMATCH);
3618     }
3619     }
3620     break;
3621 ph10 182
3622 ph10 178 case OP_VSPACE:
3623     for (i = 1; i <= min; i++)
3624     {
3625 ph10 427 if (eptr >= md->end_subject)
3626 ph10 426 {
3627 ph10 427 SCHECK_PARTIAL();
3628 ph10 426 RRETURN(MATCH_NOMATCH);
3629 ph10 427 }
3630 ph10 178 GETCHARINC(c, eptr);
3631     switch(c)
3632     {
3633     default: RRETURN(MATCH_NOMATCH);
3634     case 0x0a: /* LF */
3635     case 0x0b: /* VT */
3636     case 0x0c: /* FF */
3637     case 0x0d: /* CR */
3638     case 0x85: /* NEL */
3639     case 0x2028: /* LINE SEPARATOR */
3640     case 0x2029: /* PARAGRAPH SEPARATOR */
3641 ph10 182 break;
3642 ph10 178 }
3643     }
3644     break;
3645    
3646 nigel 77 case OP_NOT_DIGIT:
3647     for (i = 1; i <= min; i++)
3648     {
3649 ph10 427 if (eptr >= md->end_subject)
3650 ph10 426 {
3651 ph10 427 SCHECK_PARTIAL();
3652 ph10 426 RRETURN(MATCH_NOMATCH);
3653 ph10 427 }
3654 nigel 77 GETCHARINC(c, eptr);
3655     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3656     RRETURN(MATCH_NOMATCH);
3657     }
3658     break;
3659    
3660     case OP_DIGIT:
3661     for (i = 1; i <= min; i++)
3662     {
3663 ph10 427 if (eptr >= md->end_subject)
3664 ph10 426 {
3665 ph10 427 SCHECK_PARTIAL();
3666 nigel 77 RRETURN(MATCH_NOMATCH);
3667 ph10 427 }
3668 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3669     RRETURN(MATCH_NOMATCH);
3670 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3671     }
3672     break;
3673    
3674     case OP_NOT_WHITESPACE:
3675     for (i = 1; i <= min; i++)
3676     {
3677 ph10 427 if (eptr >= md->end_subject)
3678 ph10 426 {
3679 ph10 427 SCHECK_PARTIAL();
3680 nigel 77 RRETURN(MATCH_NOMATCH);
3681 ph10 427 }
3682 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3683     RRETURN(MATCH_NOMATCH);
3684 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3685 nigel 77 }
3686     break;
3687    
3688     case OP_WHITESPACE:
3689     for (i = 1; i <= min; i++)
3690     {
3691 ph10 427 if (eptr >= md->end_subject)
3692 ph10 426 {
3693 ph10 427 SCHECK_PARTIAL();
3694 nigel 77 RRETURN(MATCH_NOMATCH);
3695 ph10 427 }
3696 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3697     RRETURN(MATCH_NOMATCH);
3698 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3699     }
3700     break;
3701    
3702     case OP_NOT_WORDCHAR:
3703     for (i = 1; i <= min; i++)
3704     {
3705 ph10 482 if (eptr >= md->end_subject)
3706     {
3707     SCHECK_PARTIAL();
3708 nigel 77 RRETURN(MATCH_NOMATCH);
3709 ph10 482 }
3710     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3711     RRETURN(MATCH_NOMATCH);
3712 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3713 nigel 77 }
3714     break;
3715    
3716     case OP_WORDCHAR:
3717     for (i = 1; i <= min; i++)
3718     {
3719 ph10 427 if (eptr >= md->end_subject)
3720 ph10 426 {
3721 ph10 427 SCHECK_PARTIAL();
3722 nigel 77 RRETURN(MATCH_NOMATCH);
3723 ph10 427 }
3724 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3725     RRETURN(MATCH_NOMATCH);
3726 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3727     }
3728     break;
3729    
3730     default:
3731     RRETURN(PCRE_ERROR_INTERNAL);
3732     } /* End switch(ctype) */
3733    
3734     else
3735     #endif /* SUPPORT_UTF8 */
3736    
3737     /* Code for the non-UTF-8 case for minimum matching of operators other
3738 ph10 426 than OP_PROP and OP_NOTPROP. */
3739 nigel 77
3740     switch(ctype)
3741     {
3742     case OP_ANY:
3743 ph10 342 for (i = 1; i <= min; i++)
3744 nigel 77 {
3745 ph10 427 if (eptr >= md->end_subject)
3746 ph10 426 {
3747 ph10 427 SCHECK_PARTIAL();
3748 ph10 426 RRETURN(MATCH_NOMATCH);
3749 ph10 427 }
3750 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3751     eptr++;
3752 nigel 77 }
3753     break;
3754    
3755 ph10 341 case OP_ALLANY:
3756 ph10 443 if (eptr > md->end_subject - min)
3757 ph10 428 {
3758 ph10 443 SCHECK_PARTIAL();
3759 ph10 428 RRETURN(MATCH_NOMATCH);
3760 ph10 443 }
3761 ph10 341 eptr += min;
3762     break;
3763    
3764 nigel 77 case OP_ANYBYTE:
3765 ph10 443 if (eptr > md->end_subject - min)
3766 ph10 428 {
3767 ph10 443 SCHECK_PARTIAL();
3768 ph10 428 RRETURN(MATCH_NOMATCH);
3769 ph10 443 }
3770 nigel 77 eptr += min;
3771     break;
3772    
3773 nigel 93 case OP_ANYNL:
3774     for (i = 1; i <= min; i++)
3775     {
3776 ph10 427 if (eptr >= md->end_subject)
3777 ph10 426 {
3778 ph10 427 SCHECK_PARTIAL();
3779 ph10 426 RRETURN(MATCH_NOMATCH);
3780 ph10 427 }
3781 nigel 93 switch(*eptr++)
3782     {
3783     default: RRETURN(MATCH_NOMATCH);
3784     case 0x000d:
3785     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3786     break;
3787     case 0x000a:
3788 ph10 231 break;
3789    
3790 nigel 93 case 0x000b:
3791     case 0x000c:
3792     case 0x0085:
3793 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3794 nigel 93 break;
3795     }
3796     }
3797     break;
3798    
3799 ph10 178 case OP_NOT_HSPACE:
3800     for (i = 1; i <= min; i++)
3801     {
3802 ph10 427 if (eptr >= md->end_subject)
3803 ph10 426 {
3804 ph10 427 SCHECK_PARTIAL();
3805 ph10 426 RRETURN(MATCH_NOMATCH);
3806 ph10 427 }
3807 ph10 178 switch(*eptr++)
3808     {
3809     default: break;
3810     case 0x09: /* HT */
3811     case 0x20: /* SPACE */
3812     case 0xa0: /* NBSP */
3813     RRETURN(MATCH_NOMATCH);
3814     }
3815     }
3816     break;
3817    
3818     case OP_HSPACE:
3819     for (i = 1; i <= min; i++)
3820     {
3821 ph10 427 if (eptr >= md->end_subject)
3822 ph10 426 {
3823 ph10 427 SCHECK_PARTIAL();
3824 ph10 426 RRETURN(MATCH_NOMATCH);
3825 ph10 427 }
3826 ph10 178 switch(*eptr++)
3827     {
3828     default: RRETURN(MATCH_NOMATCH);
3829     case 0x09: /* HT */
3830     case 0x20: /* SPACE */
3831     case 0xa0: /* NBSP */
3832 ph10 182 break;
3833 ph10 178 }
3834     }
3835     break;
3836    
3837     case OP_NOT_VSPACE:
3838     for (i = 1; i <= min; i++)
3839     {
3840 ph10 427 if (eptr >= md->end_subject)
3841 ph10 426 {
3842 ph10 427 SCHECK_PARTIAL();
3843 ph10 426 RRETURN(MATCH_NOMATCH);
3844 ph10 427 }
3845 ph10 178 switch(*eptr++)
3846     {
3847     default: break;
3848     case 0x0a: /* LF */
3849     case 0x0b: /* VT */
3850     case 0x0c: /* FF */
3851     case 0x0d: /* CR */
3852     case 0x85: /* NEL */
3853     RRETURN(MATCH_NOMATCH);
3854     }
3855     }
3856     break;
3857    
3858     case OP_VSPACE:
3859     for (i = 1; i <= min; i++)
3860     {
3861 ph10 427 if (eptr >= md->end_subject)
3862 ph10 426 {
3863 ph10 427 SCHECK_PARTIAL();
3864 ph10 426 RRETURN(MATCH_NOMATCH);
3865 ph10 427 }
3866 ph10 178 switch(*eptr++)
3867     {
3868     default: RRETURN(MATCH_NOMATCH);
3869     case 0x0a: /* LF */
3870     case 0x0b: /* VT */
3871     case 0x0c: /* FF */
3872     case 0x0d: /* CR */
3873     case 0x85: /* NEL */
3874 ph10 182 break;
3875 ph10 178 }
3876     }
3877     break;
3878    
3879 nigel 77 case OP_NOT_DIGIT:
3880     for (i = 1; i <= min; i++)
3881 ph10 427 {
3882     if (eptr >= md->end_subject)
3883 ph10 426 {
3884 ph10 427 SCHECK_PARTIAL();
3885 ph10 426 RRETURN(MATCH_NOMATCH);
3886 ph10 427 }
3887 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3888 ph10 427 }
3889 nigel 77 break;
3890    
3891     case OP_DIGIT:
3892     for (i = 1; i <= min; i++)
3893 ph10 427 {
3894     if (eptr >= md->end_subject)
3895 ph10 426 {
3896 ph10 427 SCHECK_PARTIAL();
3897 ph10 426 RRETURN(MATCH_NOMATCH);
3898 ph10 427 }
3899 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3900 ph10 427 }
3901 nigel 77 break;
3902    
3903     case OP_NOT_WHITESPACE:
3904     for (i = 1; i <= min; i++)
3905 ph10 427 {
3906     if (eptr >= md->end_subject)
3907 ph10 426 {
3908 ph10 427 SCHECK_PARTIAL();
3909 ph10 426 RRETURN(MATCH_