/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 501 - (hide annotations) (download)
Sun Mar 7 11:49:54 2010 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 171447 byte(s)
Preparation code for future (*MARK) support.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92 ph10 475 #ifdef PCRE_DEBUG
93 nigel 77 /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144 ph10 475 #ifdef PCRE_DEBUG
145 nigel 77 if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 501 actually used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 ph10 475 #ifdef PCRE_DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 501 newframe->Xmarkptr = markptr;\
292 ph10 164 newframe->Xoffset_top = rc;\
293     newframe->Xims = re;\
294     newframe->Xeptrb = rf;\
295     newframe->Xflags = rg;\
296     newframe->Xrdepth = frame->Xrdepth + 1;\
297     newframe->Xprevframe = frame;\
298     frame = newframe;\
299     DPRINTF(("restarting from line %d\n", __LINE__));\
300     goto HEAP_RECURSE;\
301     L_##rw:\
302     DPRINTF(("jumped back to line %d\n", __LINE__));\
303 nigel 77 }
304    
305     #define RRETURN(ra)\
306     {\
307     heapframe *newframe = frame;\
308     frame = newframe->Xprevframe;\
309     (pcre_stack_free)(newframe);\
310     if (frame != NULL)\
311     {\
312 ph10 164 rrc = ra;\
313     goto HEAP_RETURN;\
314 nigel 77 }\
315     return ra;\
316     }
317    
318    
319     /* Structure for remembering the local variables in a private frame */
320    
321     typedef struct heapframe {
322     struct heapframe *Xprevframe;
323    
324     /* Function arguments that may change */
325    
326 ph10 409 USPTR Xeptr;
327 nigel 77 const uschar *Xecode;
328 ph10 409 USPTR Xmstart;
329 ph10 501 USPTR Xmarkptr;
330 nigel 77 int Xoffset_top;
331     long int Xims;
332     eptrblock *Xeptrb;
333     int Xflags;
334 nigel 91 unsigned int Xrdepth;
335 nigel 77
336     /* Function local variables */
337    
338 ph10 409 USPTR Xcallpat;
339 ph10 406 #ifdef SUPPORT_UTF8
340 ph10 409 USPTR Xcharptr;
341 ph10 406 #endif
342 ph10 409 USPTR Xdata;
343     USPTR Xnext;
344     USPTR Xpp;
345     USPTR Xprev;
346     USPTR Xsaved_eptr;
347 nigel 77
348     recursion_info Xnew_recursive;
349    
350     BOOL Xcur_is_word;
351     BOOL Xcondition;
352     BOOL Xprev_is_word;
353    
354     unsigned long int Xoriginal_ims;
355    
356     #ifdef SUPPORT_UCP
357     int Xprop_type;
358 nigel 87 int Xprop_value;
359 nigel 77 int Xprop_fail_result;
360     int Xprop_category;
361     int Xprop_chartype;
362 nigel 87 int Xprop_script;
363 ph10 123 int Xoclength;
364     uschar Xocchars[8];
365 nigel 77 #endif
366    
367 ph10 403 int Xcodelink;
368 nigel 77 int Xctype;
369 nigel 93 unsigned int Xfc;
370 nigel 77 int Xfi;
371     int Xlength;
372     int Xmax;
373     int Xmin;
374     int Xnumber;
375     int Xoffset;
376     int Xop;
377     int Xsave_capture_last;
378     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
379     int Xstacksave[REC_STACK_SAVE_MAX];
380    
381     eptrblock Xnewptrb;
382    
383 ph10 164 /* Where to jump back to */
384 nigel 77
385 ph10 164 int Xwhere;
386 ph10 165
387 nigel 77 } heapframe;
388    
389     #endif
390    
391    
392     /***************************************************************************
393     ***************************************************************************/
394    
395    
396    
397     /*************************************************
398     * Match from current position *
399     *************************************************/
400    
401 nigel 93 /* This function is called recursively in many circumstances. Whenever it
402 nigel 77 returns a negative (error) response, the outer incarnation must also return the
403 ph10 426 same response. */
404 nigel 77
405 ph10 426 /* These macros pack up tests that are used for partial matching, and which
406     appears several times in the code. We set the "hit end" flag if the pointer is
407     at the end of the subject and also past the start of the subject (i.e.
408 ph10 427 something has been matched). For hard partial matching, we then return
409     immediately. The second one is used when we already know we are past the end of
410     the subject. */
411 ph10 426
412     #define CHECK_PARTIAL()\
413 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
414 ph10 427 {\
415     md->hitend = TRUE;\
416     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
417     }
418 ph10 426
419     #define SCHECK_PARTIAL()\
420 ph10 462 if (md->partial != 0 && eptr > mstart)\
421 ph10 427 {\
422     md->hitend = TRUE;\
423     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
424     }
425 ph10 426
426 ph10 427
427 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
428     the md structure (e.g. utf8, end_subject) into individual variables to improve
429 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
430     made performance worse.
431    
432     Arguments:
433 nigel 93 eptr pointer to current character in subject
434     ecode pointer to current position in compiled code
435 ph10 168 mstart pointer to the current match start position (can be modified
436 ph10 172 by encountering \K)
437 ph10 501 markptr pointer to the most recent MARK name, or NULL
438 nigel 77 offset_top current top pointer
439     md pointer to "static" info for the match
440     ims current /i, /m, and /s options
441     eptrb pointer to chain of blocks containing eptr at start of
442     brackets - for testing for empty matches
443     flags can contain
444     match_condassert - this is an assertion condition
445 nigel 93 match_cbegroup - this is the start of an unlimited repeat
446     group that can match an empty string
447 nigel 87 rdepth the recursion depth
448 nigel 77
449     Returns: MATCH_MATCH if matched ) these values are >= 0
450     MATCH_NOMATCH if failed to match )
451     a negative PCRE_ERROR_xxx value if aborted by an error condition
452 nigel 87 (e.g. stopped by repeated call or recursion limit)
453 nigel 77 */
454    
455     static int
456 ph10 501 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
457     markptr, int offset_top, match_data *md, unsigned long int ims,
458     eptrblock *eptrb, int flags, unsigned int rdepth)
459 nigel 77 {
460     /* These variables do not need to be preserved over recursion in this function,
461 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
462     "register" because they are used a lot in loops. */
463 nigel 77
464 nigel 91 register int rrc; /* Returns from recursive calls */
465     register int i; /* Used for loops not involving calls to RMATCH() */
466 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
467 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
468 nigel 77
469 nigel 93 BOOL minimize, possessive; /* Quantifier options */
470 ph10 403 int condcode;
471 nigel 93
472 nigel 77 /* When recursion is not being used, all "local" variables that have to be
473     preserved over calls to RMATCH() are part of a "frame" which is obtained from
474     heap storage. Set up the top-level frame here; others are obtained from the
475     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
476    
477     #ifdef NO_RECURSE
478     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
479     frame->Xprevframe = NULL; /* Marks the top level */
480    
481     /* Copy in the original argument variables */
482    
483     frame->Xeptr = eptr;
484     frame->Xecode = ecode;
485 ph10 168 frame->Xmstart = mstart;
486 ph10 501 frame->Xmarkptr = markptr;
487 nigel 77 frame->Xoffset_top = offset_top;
488     frame->Xims = ims;
489     frame->Xeptrb = eptrb;
490     frame->Xflags = flags;
491 nigel 87 frame->Xrdepth = rdepth;
492 nigel 77
493     /* This is where control jumps back to to effect "recursion" */
494    
495     HEAP_RECURSE:
496    
497     /* Macros make the argument variables come from the current frame */
498    
499     #define eptr frame->Xeptr
500     #define ecode frame->Xecode
501 ph10 168 #define mstart frame->Xmstart
502 ph10 501 #define markptr frame->Xmarkptr
503 nigel 77 #define offset_top frame->Xoffset_top
504     #define ims frame->Xims
505     #define eptrb frame->Xeptrb
506     #define flags frame->Xflags
507 nigel 87 #define rdepth frame->Xrdepth
508 nigel 77
509     /* Ditto for the local variables */
510    
511     #ifdef SUPPORT_UTF8
512     #define charptr frame->Xcharptr
513     #endif
514     #define callpat frame->Xcallpat
515 ph10 403 #define codelink frame->Xcodelink
516 nigel 77 #define data frame->Xdata
517     #define next frame->Xnext
518     #define pp frame->Xpp
519     #define prev frame->Xprev
520     #define saved_eptr frame->Xsaved_eptr
521    
522     #define new_recursive frame->Xnew_recursive
523    
524     #define cur_is_word frame->Xcur_is_word
525     #define condition frame->Xcondition
526     #define prev_is_word frame->Xprev_is_word
527    
528     #define original_ims frame->Xoriginal_ims
529    
530     #ifdef SUPPORT_UCP
531     #define prop_type frame->Xprop_type
532 nigel 87 #define prop_value frame->Xprop_value
533 nigel 77 #define prop_fail_result frame->Xprop_fail_result
534     #define prop_category frame->Xprop_category
535     #define prop_chartype frame->Xprop_chartype
536 nigel 87 #define prop_script frame->Xprop_script
537 ph10 115 #define oclength frame->Xoclength
538     #define occhars frame->Xocchars
539 nigel 77 #endif
540    
541     #define ctype frame->Xctype
542     #define fc frame->Xfc
543     #define fi frame->Xfi
544     #define length frame->Xlength
545     #define max frame->Xmax
546     #define min frame->Xmin
547     #define number frame->Xnumber
548     #define offset frame->Xoffset
549     #define op frame->Xop
550     #define save_capture_last frame->Xsave_capture_last
551     #define save_offset1 frame->Xsave_offset1
552     #define save_offset2 frame->Xsave_offset2
553     #define save_offset3 frame->Xsave_offset3
554     #define stacksave frame->Xstacksave
555    
556     #define newptrb frame->Xnewptrb
557    
558     /* When recursion is being used, local variables are allocated on the stack and
559     get preserved during recursion in the normal way. In this environment, fi and
560     i, and fc and c, can be the same variables. */
561    
562 nigel 93 #else /* NO_RECURSE not defined */
563 nigel 77 #define fi i
564     #define fc c
565    
566    
567 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
568     const uschar *charptr; /* in small blocks of the code. My normal */
569     #endif /* style of coding would have declared */
570     const uschar *callpat; /* them within each of those blocks. */
571     const uschar *data; /* However, in order to accommodate the */
572     const uschar *next; /* version of this code that uses an */
573     USPTR pp; /* external "stack" implemented on the */
574     const uschar *prev; /* heap, it is easier to declare them all */
575     USPTR saved_eptr; /* here, so the declarations can be cut */
576     /* out in a block. The only declarations */
577     recursion_info new_recursive; /* within blocks below are for variables */
578     /* that do not have to be preserved over */
579     BOOL cur_is_word; /* a recursive call to RMATCH(). */
580     BOOL condition;
581 nigel 77 BOOL prev_is_word;
582    
583     unsigned long int original_ims;
584    
585     #ifdef SUPPORT_UCP
586     int prop_type;
587 nigel 87 int prop_value;
588 nigel 77 int prop_fail_result;
589     int prop_category;
590     int prop_chartype;
591 nigel 87 int prop_script;
592 ph10 115 int oclength;
593     uschar occhars[8];
594 nigel 77 #endif
595    
596 ph10 399 int codelink;
597 nigel 77 int ctype;
598     int length;
599     int max;
600     int min;
601     int number;
602     int offset;
603     int op;
604     int save_capture_last;
605     int save_offset1, save_offset2, save_offset3;
606     int stacksave[REC_STACK_SAVE_MAX];
607    
608     eptrblock newptrb;
609 nigel 93 #endif /* NO_RECURSE */
610 nigel 77
611     /* These statements are here to stop the compiler complaining about unitialized
612     variables. */
613    
614     #ifdef SUPPORT_UCP
615 nigel 87 prop_value = 0;
616 nigel 77 prop_fail_result = 0;
617     #endif
618    
619 nigel 93
620 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
621     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
622     used. Thanks to Ian Taylor for noticing this possibility and sending the
623     original patch. */
624    
625     TAIL_RECURSE:
626    
627 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
628     are specified by the macro RMATCH and RRETURN is used to return. When
629     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
630 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
631 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
632     complicated macro. It has to be used in one particular way. This shouldn't,
633     however, impact performance when true recursion is being used. */
634 nigel 77
635 ph10 164 #ifdef SUPPORT_UTF8
636     utf8 = md->utf8; /* Local copy of the flag */
637     #else
638     utf8 = FALSE;
639     #endif
640    
641 nigel 87 /* First check that we haven't called match() too many times, or that we
642     haven't exceeded the recursive call limit. */
643    
644 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
645 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
646 nigel 77
647     original_ims = ims; /* Save for resetting on ')' */
648 nigel 91
649 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
650     string, the match_cbegroup flag is set. When this is the case, add the current
651     subject pointer to the chain of such remembered pointers, to be checked when we
652     hit the closing ket, in order to break infinite loops that match no characters.
653 ph10 197 When match() is called in other circumstances, don't add to the chain. The
654     match_cbegroup flag must NOT be used with tail recursion, because the memory
655     block that is used is on the stack, so a new one may be required for each
656     match(). */
657 nigel 77
658 nigel 93 if ((flags & match_cbegroup) != 0)
659 nigel 77 {
660 ph10 197 newptrb.epb_saved_eptr = eptr;
661     newptrb.epb_prev = eptrb;
662     eptrb = &newptrb;
663 nigel 77 }
664    
665 nigel 93 /* Now start processing the opcodes. */
666 nigel 77
667     for (;;)
668     {
669 nigel 93 minimize = possessive = FALSE;
670 nigel 77 op = *ecode;
671 ph10 443
672 nigel 93 switch(op)
673     {
674 ph10 210 case OP_FAIL:
675 ph10 212 RRETURN(MATCH_NOMATCH);
676 ph10 211
677 ph10 210 case OP_PRUNE:
678     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
679     ims, eptrb, flags, RM51);
680     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
681 ph10 212 RRETURN(MATCH_PRUNE);
682 ph10 211
683 ph10 210 case OP_COMMIT:
684     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
685     ims, eptrb, flags, RM52);
686     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
687 ph10 212 RRETURN(MATCH_COMMIT);
688 ph10 211
689 ph10 210 case OP_SKIP:
690     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691     ims, eptrb, flags, RM53);
692     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
693 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
694 ph10 212 RRETURN(MATCH_SKIP);
695 ph10 211
696 ph10 210 case OP_THEN:
697     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
698 ph10 212 ims, eptrb, flags, RM54);
699 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
700 ph10 212 RRETURN(MATCH_THEN);
701 ph10 211
702 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
703     the current subject position in the working slot at the top of the vector.
704     We mustn't change the current values of the data slot, because they may be
705     set from a previous iteration of this group, and be referred to by a
706     reference inside the group.
707 nigel 77
708 nigel 93 If the bracket fails to match, we need to restore this value and also the
709     values of the final offsets, in case they were set by a previous iteration
710     of the same bracket.
711 nigel 77
712 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
713     a non-capturing bracket. Don't worry about setting the flag for the error
714     case here; that is handled in the code for KET. */
715 nigel 77
716 nigel 93 case OP_CBRA:
717     case OP_SCBRA:
718     number = GET2(ecode, 1+LINK_SIZE);
719 nigel 77 offset = number << 1;
720    
721 ph10 475 #ifdef PCRE_DEBUG
722 nigel 93 printf("start bracket %d\n", number);
723     printf("subject=");
724 nigel 77 pchars(eptr, 16, TRUE, md);
725     printf("\n");
726     #endif
727    
728     if (offset < md->offset_max)
729     {
730     save_offset1 = md->offset_vector[offset];
731     save_offset2 = md->offset_vector[offset+1];
732     save_offset3 = md->offset_vector[md->offset_end - number];
733     save_capture_last = md->capture_last;
734    
735     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
736     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
737    
738 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
739 nigel 77 do
740     {
741 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
742     ims, eptrb, flags, RM1);
743 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
744 nigel 77 md->capture_last = save_capture_last;
745     ecode += GET(ecode, 1);
746     }
747     while (*ecode == OP_ALT);
748    
749     DPRINTF(("bracket %d failed\n", number));
750    
751     md->offset_vector[offset] = save_offset1;
752     md->offset_vector[offset+1] = save_offset2;
753     md->offset_vector[md->offset_end - number] = save_offset3;
754    
755     RRETURN(MATCH_NOMATCH);
756     }
757    
758 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
759     as a non-capturing bracket. */
760 nigel 77
761 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
765 nigel 77
766 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
767     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
768    
769 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
770     final alternative within the brackets, we would return the result of a
771     recursive call to match() whatever happened. We can reduce stack usage by
772 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
773     is set.*/
774 nigel 77
775 nigel 93 case OP_BRA:
776     case OP_SBRA:
777     DPRINTF(("start non-capturing bracket\n"));
778     flags = (op >= OP_SBRA)? match_cbegroup : 0;
779 nigel 91 for (;;)
780 nigel 77 {
781 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
782 nigel 93 {
783 ph10 197 if (flags == 0) /* Not a possibly empty group */
784     {
785     ecode += _pcre_OP_lengths[*ecode];
786     DPRINTF(("bracket 0 tail recursion\n"));
787     goto TAIL_RECURSE;
788     }
789    
790     /* Possibly empty group; can't use tail recursion. */
791    
792     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
793     eptrb, flags, RM48);
794     RRETURN(rrc);
795 nigel 93 }
796 nigel 91
797     /* For non-final alternatives, continue the loop for a NOMATCH result;
798     otherwise return. */
799    
800 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
801     eptrb, flags, RM2);
802 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803 nigel 77 ecode += GET(ecode, 1);
804     }
805 nigel 91 /* Control never reaches here. */
806 nigel 77
807     /* Conditional group: compilation checked that there are no more than
808     two branches. If the condition is false, skipping the first branch takes us
809     past the end if there is only one branch, but that's OK because that is
810 nigel 91 exactly what going to the ket would do. As there is only one branch to be
811     obeyed, we can use tail recursion to avoid using another stack frame. */
812 nigel 77
813     case OP_COND:
814 nigel 93 case OP_SCOND:
815 ph10 399 codelink= GET(ecode, 1);
816 ph10 406
817 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
818     inserted between OP_COND and an assertion condition. */
819 ph10 392
820 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
821     {
822     if (pcre_callout != NULL)
823     {
824     pcre_callout_block cb;
825     cb.version = 1; /* Version 1 of the callout block */
826     cb.callout_number = ecode[LINK_SIZE+2];
827     cb.offset_vector = md->offset_vector;
828     cb.subject = (PCRE_SPTR)md->start_subject;
829     cb.subject_length = md->end_subject - md->start_subject;
830     cb.start_match = mstart - md->start_subject;
831     cb.current_position = eptr - md->start_subject;
832     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
833     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
834     cb.capture_top = offset_top/2;
835     cb.capture_last = md->capture_last;
836     cb.callout_data = md->callout_data;
837     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
838     if (rrc < 0) RRETURN(rrc);
839     }
840     ecode += _pcre_OP_lengths[OP_CALLOUT];
841     }
842 ph10 392
843 ph10 399 condcode = ecode[LINK_SIZE+1];
844 ph10 406
845 ph10 381 /* Now see what the actual condition is */
846 ph10 392
847 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
848 nigel 77 {
849 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
850     {
851 ph10 461 condition = FALSE;
852     ecode += GET(ecode, 1);
853     }
854 ph10 459 else
855 ph10 461 {
856 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
857     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
858 ph10 461
859 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
860     false, but the test was set up by name, scan the table to see if the
861     name refers to any other numbers, and test them. The condition is true
862     if any one is set. */
863 ph10 461
864 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
865     {
866     uschar *slotA = md->name_table;
867     for (i = 0; i < md->name_count; i++)
868 ph10 461 {
869     if (GET2(slotA, 0) == recno) break;
870 ph10 459 slotA += md->name_entry_size;
871     }
872 ph10 461
873 ph10 459 /* Found a name for the number - there can be only one; duplicate
874     names for different numbers are allowed, but not vice versa. First
875     scan down for duplicates. */
876 ph10 461
877 ph10 459 if (i < md->name_count)
878 ph10 461 {
879 ph10 459 uschar *slotB = slotA;
880     while (slotB > md->name_table)
881     {
882     slotB -= md->name_entry_size;
883     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
884     {
885     condition = GET2(slotB, 0) == md->recursive->group_num;
886 ph10 461 if (condition) break;
887     }
888 ph10 459 else break;
889 ph10 461 }
890    
891 ph10 459 /* Scan up for duplicates */
892 ph10 461
893 ph10 459 if (!condition)
894 ph10 461 {
895 ph10 459 slotB = slotA;
896     for (i++; i < md->name_count; i++)
897     {
898     slotB += md->name_entry_size;
899     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
900     {
901     condition = GET2(slotB, 0) == md->recursive->group_num;
902     if (condition) break;
903 ph10 461 }
904 ph10 459 else break;
905 ph10 461 }
906     }
907 ph10 459 }
908 ph10 461 }
909    
910 ph10 459 /* Chose branch according to the condition */
911 ph10 461
912 ph10 459 ecode += condition? 3 : GET(ecode, 1);
913     }
914 ph10 461 }
915 nigel 93
916 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
917 nigel 93 {
918 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
919 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
920 ph10 461
921 ph10 459 /* If the numbered capture is unset, but the reference was by name,
922 ph10 461 scan the table to see if the name refers to any other numbers, and test
923     them. The condition is true if any one is set. This is tediously similar
924     to the code above, but not close enough to try to amalgamate. */
925    
926 ph10 459 if (!condition && condcode == OP_NCREF)
927     {
928 ph10 461 int refno = offset >> 1;
929 ph10 459 uschar *slotA = md->name_table;
930 ph10 461
931 ph10 459 for (i = 0; i < md->name_count; i++)
932 ph10 461 {
933     if (GET2(slotA, 0) == refno) break;
934 ph10 459 slotA += md->name_entry_size;
935     }
936 ph10 461
937     /* Found a name for the number - there can be only one; duplicate names
938     for different numbers are allowed, but not vice versa. First scan down
939 ph10 459 for duplicates. */
940 ph10 461
941 ph10 459 if (i < md->name_count)
942 ph10 461 {
943 ph10 459 uschar *slotB = slotA;
944     while (slotB > md->name_table)
945     {
946     slotB -= md->name_entry_size;
947     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948     {
949     offset = GET2(slotB, 0) << 1;
950 ph10 461 condition = offset < offset_top &&
951 ph10 459 md->offset_vector[offset] >= 0;
952 ph10 461 if (condition) break;
953     }
954 ph10 459 else break;
955 ph10 461 }
956    
957 ph10 459 /* Scan up for duplicates */
958 ph10 461
959 ph10 459 if (!condition)
960 ph10 461 {
961 ph10 459 slotB = slotA;
962     for (i++; i < md->name_count; i++)
963     {
964     slotB += md->name_entry_size;
965     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966     {
967     offset = GET2(slotB, 0) << 1;
968 ph10 461 condition = offset < offset_top &&
969 ph10 459 md->offset_vector[offset] >= 0;
970 ph10 461 if (condition) break;
971     }
972 ph10 459 else break;
973 ph10 461 }
974     }
975 ph10 459 }
976 ph10 461 }
977    
978 ph10 459 /* Chose branch according to the condition */
979    
980 nigel 93 ecode += condition? 3 : GET(ecode, 1);
981 nigel 77 }
982    
983 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
984 nigel 93 {
985     condition = FALSE;
986     ecode += GET(ecode, 1);
987     }
988    
989 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
990 nigel 93 the final argument match_condassert causes it to stop at the end of an
991     assertion. */
992 nigel 77
993     else
994     {
995 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
996     match_condassert, RM3);
997 nigel 77 if (rrc == MATCH_MATCH)
998     {
999 nigel 93 condition = TRUE;
1000     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1001 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1002     }
1003 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1004 nigel 77 {
1005     RRETURN(rrc); /* Need braces because of following else */
1006     }
1007 nigel 93 else
1008     {
1009     condition = FALSE;
1010 ph10 399 ecode += codelink;
1011 nigel 93 }
1012     }
1013 nigel 91
1014 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1015 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1016     match_cbegroup is required for an unlimited repeat of a possibly empty
1017     group. If the second alternative doesn't exist, we can just plough on. */
1018 nigel 91
1019 nigel 93 if (condition || *ecode == OP_ALT)
1020     {
1021 nigel 91 ecode += 1 + LINK_SIZE;
1022 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1023     {
1024     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1025     RRETURN(rrc);
1026     }
1027     else /* Group must match something */
1028     {
1029     flags = 0;
1030     goto TAIL_RECURSE;
1031     }
1032 nigel 77 }
1033 ph10 395 else /* Condition false & no alternative */
1034 nigel 93 {
1035     ecode += 1 + LINK_SIZE;
1036     }
1037     break;
1038 nigel 77
1039 ph10 461
1040 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1041     to close any currently open capturing brackets. */
1042 ph10 461
1043 ph10 447 case OP_CLOSE:
1044 ph10 461 number = GET2(ecode, 1);
1045 ph10 447 offset = number << 1;
1046 ph10 461
1047 ph10 475 #ifdef PCRE_DEBUG
1048 ph10 447 printf("end bracket %d at *ACCEPT", number);
1049     printf("\n");
1050     #endif
1051 nigel 77
1052 ph10 447 md->capture_last = number;
1053     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1054     {
1055     md->offset_vector[offset] =
1056     md->offset_vector[md->offset_end - number];
1057     md->offset_vector[offset+1] = eptr - md->start_subject;
1058     if (offset_top <= offset) offset_top = offset + 2;
1059     }
1060     ecode += 3;
1061 ph10 461 break;
1062 ph10 447
1063    
1064 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1065     recursion, we should restore the offsets appropriately and continue from
1066     after the call. */
1067 nigel 77
1068 ph10 210 case OP_ACCEPT:
1069 nigel 77 case OP_END:
1070     if (md->recursive != NULL && md->recursive->group_num == 0)
1071     {
1072     recursion_info *rec = md->recursive;
1073 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1074 nigel 77 md->recursive = rec->prevrec;
1075     memmove(md->offset_vector, rec->offset_save,
1076     rec->saved_max * sizeof(int));
1077 ph10 461 offset_top = rec->save_offset_top;
1078 nigel 77 ims = original_ims;
1079     ecode = rec->after_call;
1080     break;
1081     }
1082    
1083 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1084     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1085     the subject. In both cases, backtracking will then try other alternatives,
1086     if any. */
1087 ph10 443
1088 ph10 442 if (eptr == mstart &&
1089     (md->notempty ||
1090 ph10 443 (md->notempty_atstart &&
1091 ph10 442 mstart == md->start_subject + md->start_offset)))
1092 ph10 443 RRETURN(MATCH_NOMATCH);
1093    
1094 ph10 442 /* Otherwise, we have a match. */
1095 nigel 77
1096 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1097     md->end_offset_top = offset_top; /* and how many extracts were taken */
1098 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1099 nigel 77 RRETURN(MATCH_MATCH);
1100    
1101     /* Change option settings */
1102    
1103     case OP_OPT:
1104     ims = ecode[1];
1105     ecode += 2;
1106     DPRINTF(("ims set to %02lx\n", ims));
1107     break;
1108    
1109     /* Assertion brackets. Check the alternative branches in turn - the
1110     matching won't pass the KET for an assertion. If any one branch matches,
1111     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1112     start of each branch to move the current point backwards, so the code at
1113     this level is identical to the lookahead case. */
1114    
1115     case OP_ASSERT:
1116     case OP_ASSERTBACK:
1117     do
1118     {
1119 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1120     RM4);
1121 ph10 501 if (rrc == MATCH_MATCH)
1122 ph10 500 {
1123     mstart = md->start_match_ptr; /* In case \K reset it */
1124     break;
1125 ph10 501 }
1126 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1127 nigel 77 ecode += GET(ecode, 1);
1128     }
1129     while (*ecode == OP_ALT);
1130     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1131    
1132     /* If checking an assertion for a condition, return MATCH_MATCH. */
1133    
1134     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1135    
1136     /* Continue from after the assertion, updating the offsets high water
1137     mark, since extracts may have been taken during the assertion. */
1138    
1139     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1140     ecode += 1 + LINK_SIZE;
1141     offset_top = md->end_offset_top;
1142     continue;
1143    
1144 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1145 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1146 ph10 473 branches. */
1147 nigel 77
1148     case OP_ASSERT_NOT:
1149     case OP_ASSERTBACK_NOT:
1150     do
1151     {
1152 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1153     RM5);
1154 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1155 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1156     {
1157     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1158 ph10 482 break;
1159     }
1160 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1161 nigel 77 ecode += GET(ecode,1);
1162     }
1163     while (*ecode == OP_ALT);
1164    
1165     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1166    
1167     ecode += 1 + LINK_SIZE;
1168     continue;
1169    
1170     /* Move the subject pointer back. This occurs only at the start of
1171     each branch of a lookbehind assertion. If we are too close to the start to
1172     move back, this match function fails. When working with UTF-8 we move
1173     back a number of characters, not bytes. */
1174    
1175     case OP_REVERSE:
1176     #ifdef SUPPORT_UTF8
1177     if (utf8)
1178     {
1179 nigel 93 i = GET(ecode, 1);
1180     while (i-- > 0)
1181 nigel 77 {
1182     eptr--;
1183     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1184 ph10 207 BACKCHAR(eptr);
1185 nigel 77 }
1186     }
1187     else
1188     #endif
1189    
1190     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1191    
1192     {
1193 nigel 93 eptr -= GET(ecode, 1);
1194 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1195     }
1196    
1197 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1198 nigel 77
1199 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1200 nigel 77 ecode += 1 + LINK_SIZE;
1201     break;
1202    
1203     /* The callout item calls an external function, if one is provided, passing
1204     details of the match so far. This is mainly for debugging, though the
1205     function is able to force a failure. */
1206    
1207     case OP_CALLOUT:
1208     if (pcre_callout != NULL)
1209     {
1210     pcre_callout_block cb;
1211     cb.version = 1; /* Version 1 of the callout block */
1212     cb.callout_number = ecode[1];
1213     cb.offset_vector = md->offset_vector;
1214 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1215 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1216 ph10 168 cb.start_match = mstart - md->start_subject;
1217 nigel 77 cb.current_position = eptr - md->start_subject;
1218     cb.pattern_position = GET(ecode, 2);
1219     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1220     cb.capture_top = offset_top/2;
1221     cb.capture_last = md->capture_last;
1222     cb.callout_data = md->callout_data;
1223     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1224     if (rrc < 0) RRETURN(rrc);
1225     }
1226     ecode += 2 + 2*LINK_SIZE;
1227     break;
1228    
1229     /* Recursion either matches the current regex, or some subexpression. The
1230     offset data is the offset to the starting bracket from the start of the
1231     whole pattern. (This is so that it works from duplicated subpatterns.)
1232    
1233     If there are any capturing brackets started but not finished, we have to
1234     save their starting points and reinstate them after the recursion. However,
1235     we don't know how many such there are (offset_top records the completed
1236     total) so we just have to save all the potential data. There may be up to
1237     65535 such values, which is too large to put on the stack, but using malloc
1238     for small numbers seems expensive. As a compromise, the stack is used when
1239     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1240     is used. A problem is what to do if the malloc fails ... there is no way of
1241     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1242     values on the stack, and accept that the rest may be wrong.
1243    
1244     There are also other values that have to be saved. We use a chained
1245     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1246     for the original version of this logic. */
1247    
1248     case OP_RECURSE:
1249     {
1250     callpat = md->start_code + GET(ecode, 1);
1251 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1252     GET2(callpat, 1 + LINK_SIZE);
1253 nigel 77
1254     /* Add to "recursing stack" */
1255    
1256     new_recursive.prevrec = md->recursive;
1257     md->recursive = &new_recursive;
1258    
1259     /* Find where to continue from afterwards */
1260    
1261     ecode += 1 + LINK_SIZE;
1262     new_recursive.after_call = ecode;
1263    
1264     /* Now save the offset data. */
1265    
1266     new_recursive.saved_max = md->offset_end;
1267     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1268     new_recursive.offset_save = stacksave;
1269     else
1270     {
1271     new_recursive.offset_save =
1272     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1273     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1274     }
1275    
1276     memcpy(new_recursive.offset_save, md->offset_vector,
1277     new_recursive.saved_max * sizeof(int));
1278 ph10 461 new_recursive.save_offset_top = offset_top;
1279 nigel 77
1280     /* OK, now we can do the recursion. For each top-level alternative we
1281     restore the offset and recursion data. */
1282    
1283     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1284 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1285 nigel 77 do
1286     {
1287 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1288     md, ims, eptrb, flags, RM6);
1289 nigel 77 if (rrc == MATCH_MATCH)
1290     {
1291 nigel 87 DPRINTF(("Recursion matched\n"));
1292 nigel 77 md->recursive = new_recursive.prevrec;
1293     if (new_recursive.offset_save != stacksave)
1294     (pcre_free)(new_recursive.offset_save);
1295     RRETURN(MATCH_MATCH);
1296     }
1297 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1298 nigel 87 {
1299     DPRINTF(("Recursion gave error %d\n", rrc));
1300 ph10 400 if (new_recursive.offset_save != stacksave)
1301     (pcre_free)(new_recursive.offset_save);
1302 nigel 87 RRETURN(rrc);
1303     }
1304 nigel 77
1305     md->recursive = &new_recursive;
1306     memcpy(md->offset_vector, new_recursive.offset_save,
1307     new_recursive.saved_max * sizeof(int));
1308     callpat += GET(callpat, 1);
1309     }
1310     while (*callpat == OP_ALT);
1311    
1312     DPRINTF(("Recursion didn't match\n"));
1313     md->recursive = new_recursive.prevrec;
1314     if (new_recursive.offset_save != stacksave)
1315     (pcre_free)(new_recursive.offset_save);
1316     RRETURN(MATCH_NOMATCH);
1317     }
1318     /* Control never reaches here */
1319    
1320     /* "Once" brackets are like assertion brackets except that after a match,
1321     the point in the subject string is not moved back. Thus there can never be
1322     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1323     Check the alternative branches in turn - the matching won't pass the KET
1324     for this kind of subpattern. If any one branch matches, we carry on as at
1325 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1326     the start-of-match value in case it was changed by \K. */
1327 nigel 77
1328     case OP_ONCE:
1329 nigel 91 prev = ecode;
1330     saved_eptr = eptr;
1331    
1332     do
1333 nigel 77 {
1334 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1335 ph10 501 if (rrc == MATCH_MATCH)
1336 ph10 500 {
1337     mstart = md->start_match_ptr;
1338     break;
1339 ph10 501 }
1340 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1341 nigel 91 ecode += GET(ecode,1);
1342     }
1343     while (*ecode == OP_ALT);
1344 nigel 77
1345 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1346 nigel 77
1347 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1348 nigel 77
1349 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1350     mark, since extracts may have been taken. */
1351 nigel 77
1352 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1353 nigel 77
1354 nigel 91 offset_top = md->end_offset_top;
1355     eptr = md->end_match_ptr;
1356 nigel 77
1357 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1358     happens for a repeating ket if no characters were matched in the group.
1359     This is the forcible breaking of infinite loops as implemented in Perl
1360     5.005. If there is an options reset, it will get obeyed in the normal
1361     course of events. */
1362 nigel 77
1363 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1364     {
1365     ecode += 1+LINK_SIZE;
1366     break;
1367     }
1368 nigel 77
1369 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1370     preceding bracket, in the appropriate order. The second "call" of match()
1371     uses tail recursion, to avoid using another stack frame. We need to reset
1372     any options that changed within the bracket before re-running it, so
1373     check the next opcode. */
1374 nigel 77
1375 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1376     {
1377     ims = (ims & ~PCRE_IMS) | ecode[4];
1378     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1379     }
1380 nigel 77
1381 nigel 91 if (*ecode == OP_KETRMIN)
1382     {
1383 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1384 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1385     ecode = prev;
1386 ph10 197 flags = 0;
1387 nigel 91 goto TAIL_RECURSE;
1388 nigel 77 }
1389 nigel 91 else /* OP_KETRMAX */
1390     {
1391 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1392 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1393     ecode += 1 + LINK_SIZE;
1394 ph10 197 flags = 0;
1395 nigel 91 goto TAIL_RECURSE;
1396     }
1397     /* Control never gets here */
1398 nigel 77
1399     /* An alternation is the end of a branch; scan along to find the end of the
1400     bracketed group and go to there. */
1401    
1402     case OP_ALT:
1403     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1404     break;
1405    
1406 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1407     indicating that it may occur zero times. It may repeat infinitely, or not
1408     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1409     with fixed upper repeat limits are compiled as a number of copies, with the
1410     optional ones preceded by BRAZERO or BRAMINZERO. */
1411 nigel 77
1412     case OP_BRAZERO:
1413     {
1414     next = ecode+1;
1415 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1416 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1417     do next += GET(next,1); while (*next == OP_ALT);
1418 nigel 93 ecode = next + 1 + LINK_SIZE;
1419 nigel 77 }
1420     break;
1421    
1422     case OP_BRAMINZERO:
1423     {
1424     next = ecode+1;
1425 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1426 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1427 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1428     ecode++;
1429     }
1430     break;
1431    
1432 ph10 335 case OP_SKIPZERO:
1433     {
1434     next = ecode+1;
1435     do next += GET(next,1); while (*next == OP_ALT);
1436     ecode = next + 1 + LINK_SIZE;
1437     }
1438     break;
1439    
1440 nigel 93 /* End of a group, repeated or non-repeating. */
1441 nigel 77
1442     case OP_KET:
1443     case OP_KETRMIN:
1444     case OP_KETRMAX:
1445 nigel 91 prev = ecode - GET(ecode, 1);
1446 nigel 77
1447 nigel 93 /* If this was a group that remembered the subject start, in order to break
1448     infinite repeats of empty string matches, retrieve the subject start from
1449     the chain. Otherwise, set it NULL. */
1450 nigel 77
1451 nigel 93 if (*prev >= OP_SBRA)
1452     {
1453     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1454     eptrb = eptrb->epb_prev; /* Backup to previous group */
1455     }
1456     else saved_eptr = NULL;
1457 nigel 77
1458 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1459     matching and return MATCH_MATCH, but record the current high water mark for
1460     use by positive assertions. We also need to record the match start in case
1461     it was changed by \K. */
1462 nigel 93
1463 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1464     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1465     *prev == OP_ONCE)
1466     {
1467     md->end_match_ptr = eptr; /* For ONCE */
1468     md->end_offset_top = offset_top;
1469 ph10 500 md->start_match_ptr = mstart;
1470 nigel 91 RRETURN(MATCH_MATCH);
1471     }
1472 nigel 77
1473 nigel 93 /* For capturing groups we have to check the group number back at the start
1474     and if necessary complete handling an extraction by setting the offsets and
1475     bumping the high water mark. Note that whole-pattern recursion is coded as
1476     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1477     when the OP_END is reached. Other recursion is handled here. */
1478 nigel 77
1479 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1480 nigel 91 {
1481 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1482 nigel 91 offset = number << 1;
1483 ph10 461
1484 ph10 475 #ifdef PCRE_DEBUG
1485 nigel 91 printf("end bracket %d", number);
1486     printf("\n");
1487 nigel 77 #endif
1488    
1489 nigel 93 md->capture_last = number;
1490     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1491 nigel 91 {
1492 nigel 93 md->offset_vector[offset] =
1493     md->offset_vector[md->offset_end - number];
1494     md->offset_vector[offset+1] = eptr - md->start_subject;
1495     if (offset_top <= offset) offset_top = offset + 2;
1496     }
1497 nigel 77
1498 nigel 93 /* Handle a recursively called group. Restore the offsets
1499     appropriately and continue from after the call. */
1500 nigel 77
1501 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1502     {
1503     recursion_info *rec = md->recursive;
1504     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1505     md->recursive = rec->prevrec;
1506     memcpy(md->offset_vector, rec->offset_save,
1507     rec->saved_max * sizeof(int));
1508 ph10 461 offset_top = rec->save_offset_top;
1509 nigel 93 ecode = rec->after_call;
1510     ims = original_ims;
1511     break;
1512 nigel 77 }
1513 nigel 91 }
1514 nigel 77
1515 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1516     flags, in case they got changed during the group. */
1517 nigel 77
1518 nigel 91 ims = original_ims;
1519     DPRINTF(("ims reset to %02lx\n", ims));
1520 nigel 77
1521 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1522     happens for a repeating ket if no characters were matched in the group.
1523     This is the forcible breaking of infinite loops as implemented in Perl
1524     5.005. If there is an options reset, it will get obeyed in the normal
1525     course of events. */
1526 nigel 77
1527 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1528     {
1529     ecode += 1 + LINK_SIZE;
1530     break;
1531     }
1532 nigel 77
1533 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1534     preceding bracket, in the appropriate order. In the second case, we can use
1535 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1536     unlimited repeat of a group that can match an empty string. */
1537 nigel 77
1538 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1539    
1540 nigel 91 if (*ecode == OP_KETRMIN)
1541     {
1542 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1543 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1544 ph10 197 if (flags != 0) /* Could match an empty string */
1545     {
1546     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1547     RRETURN(rrc);
1548     }
1549 nigel 91 ecode = prev;
1550     goto TAIL_RECURSE;
1551 nigel 77 }
1552 nigel 91 else /* OP_KETRMAX */
1553     {
1554 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1555 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1556     ecode += 1 + LINK_SIZE;
1557 ph10 197 flags = 0;
1558 nigel 91 goto TAIL_RECURSE;
1559     }
1560     /* Control never gets here */
1561 nigel 77
1562     /* Start of subject unless notbol, or after internal newline if multiline */
1563    
1564     case OP_CIRC:
1565     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1566     if ((ims & PCRE_MULTILINE) != 0)
1567     {
1568 nigel 91 if (eptr != md->start_subject &&
1569 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1570 nigel 77 RRETURN(MATCH_NOMATCH);
1571     ecode++;
1572     break;
1573     }
1574     /* ... else fall through */
1575    
1576     /* Start of subject assertion */
1577    
1578     case OP_SOD:
1579     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1580     ecode++;
1581     break;
1582    
1583     /* Start of match assertion */
1584    
1585     case OP_SOM:
1586     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1587     ecode++;
1588     break;
1589 ph10 172
1590 ph10 168 /* Reset the start of match point */
1591 ph10 172
1592 ph10 168 case OP_SET_SOM:
1593     mstart = eptr;
1594 ph10 172 ecode++;
1595     break;
1596 nigel 77
1597     /* Assert before internal newline if multiline, or before a terminating
1598     newline unless endonly is set, else end of subject unless noteol is set. */
1599    
1600     case OP_DOLL:
1601     if ((ims & PCRE_MULTILINE) != 0)
1602     {
1603     if (eptr < md->end_subject)
1604 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1605 nigel 77 else
1606     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1607     ecode++;
1608     break;
1609     }
1610     else
1611     {
1612     if (md->noteol) RRETURN(MATCH_NOMATCH);
1613     if (!md->endonly)
1614     {
1615 nigel 91 if (eptr != md->end_subject &&
1616 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617 nigel 77 RRETURN(MATCH_NOMATCH);
1618     ecode++;
1619     break;
1620     }
1621     }
1622 nigel 91 /* ... else fall through for endonly */
1623 nigel 77
1624     /* End of subject assertion (\z) */
1625    
1626     case OP_EOD:
1627     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1628     ecode++;
1629     break;
1630    
1631     /* End of subject or ending \n assertion (\Z) */
1632    
1633     case OP_EODN:
1634 nigel 91 if (eptr != md->end_subject &&
1635 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1636 nigel 91 RRETURN(MATCH_NOMATCH);
1637 nigel 77 ecode++;
1638     break;
1639    
1640     /* Word boundary assertions */
1641    
1642     case OP_NOT_WORD_BOUNDARY:
1643     case OP_WORD_BOUNDARY:
1644     {
1645    
1646     /* Find out if the previous and current characters are "word" characters.
1647     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1648 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1649 ph10 435 partial matching. */
1650 nigel 77
1651     #ifdef SUPPORT_UTF8
1652     if (utf8)
1653     {
1654     if (eptr == md->start_subject) prev_is_word = FALSE; else
1655     {
1656 ph10 409 USPTR lastptr = eptr - 1;
1657 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1658 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1659 nigel 77 GETCHAR(c, lastptr);
1660     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1661     }
1662 ph10 443 if (eptr >= md->end_subject)
1663 nigel 77 {
1664 ph10 443 SCHECK_PARTIAL();
1665     cur_is_word = FALSE;
1666 ph10 428 }
1667     else
1668     {
1669 nigel 77 GETCHAR(c, eptr);
1670     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1671     }
1672     }
1673     else
1674     #endif
1675    
1676 ph10 428 /* Not in UTF-8 mode */
1677 nigel 77
1678     {
1679 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1680     {
1681 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1682 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1683     }
1684 ph10 443 if (eptr >= md->end_subject)
1685 ph10 428 {
1686 ph10 443 SCHECK_PARTIAL();
1687     cur_is_word = FALSE;
1688 ph10 428 }
1689     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1690 nigel 77 }
1691    
1692     /* Now see if the situation is what we want */
1693    
1694     if ((*ecode++ == OP_WORD_BOUNDARY)?
1695     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1696     RRETURN(MATCH_NOMATCH);
1697     }
1698     break;
1699    
1700     /* Match a single character type; inline for speed */
1701    
1702     case OP_ANY:
1703 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1704 ph10 345 /* Fall through */
1705    
1706 ph10 341 case OP_ALLANY:
1707 ph10 443 if (eptr++ >= md->end_subject)
1708 ph10 428 {
1709 ph10 443 SCHECK_PARTIAL();
1710 ph10 428 RRETURN(MATCH_NOMATCH);
1711 ph10 443 }
1712 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1713 nigel 77 ecode++;
1714     break;
1715    
1716     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1717     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1718    
1719     case OP_ANYBYTE:
1720 ph10 443 if (eptr++ >= md->end_subject)
1721 ph10 428 {
1722 ph10 443 SCHECK_PARTIAL();
1723 ph10 428 RRETURN(MATCH_NOMATCH);
1724 ph10 443 }
1725 nigel 77 ecode++;
1726     break;
1727    
1728     case OP_NOT_DIGIT:
1729 ph10 443 if (eptr >= md->end_subject)
1730 ph10 428 {
1731 ph10 443 SCHECK_PARTIAL();
1732 ph10 428 RRETURN(MATCH_NOMATCH);
1733 ph10 443 }
1734 nigel 77 GETCHARINCTEST(c, eptr);
1735     if (
1736     #ifdef SUPPORT_UTF8
1737     c < 256 &&
1738     #endif
1739     (md->ctypes[c] & ctype_digit) != 0
1740     )
1741     RRETURN(MATCH_NOMATCH);
1742     ecode++;
1743     break;
1744    
1745     case OP_DIGIT:
1746 ph10 443 if (eptr >= md->end_subject)
1747 ph10 428 {
1748 ph10 443 SCHECK_PARTIAL();
1749 ph10 428 RRETURN(MATCH_NOMATCH);
1750 ph10 443 }
1751 nigel 77 GETCHARINCTEST(c, eptr);
1752     if (
1753     #ifdef SUPPORT_UTF8
1754     c >= 256 ||
1755     #endif
1756     (md->ctypes[c] & ctype_digit) == 0
1757     )
1758     RRETURN(MATCH_NOMATCH);
1759     ecode++;
1760     break;
1761    
1762     case OP_NOT_WHITESPACE:
1763 ph10 443 if (eptr >= md->end_subject)
1764 ph10 428 {
1765 ph10 443 SCHECK_PARTIAL();
1766 ph10 428 RRETURN(MATCH_NOMATCH);
1767 ph10 443 }
1768 nigel 77 GETCHARINCTEST(c, eptr);
1769     if (
1770     #ifdef SUPPORT_UTF8
1771     c < 256 &&
1772     #endif
1773     (md->ctypes[c] & ctype_space) != 0
1774     )
1775     RRETURN(MATCH_NOMATCH);
1776     ecode++;
1777     break;
1778    
1779     case OP_WHITESPACE:
1780 ph10 443 if (eptr >= md->end_subject)
1781 ph10 428 {
1782 ph10 443 SCHECK_PARTIAL();
1783 ph10 428 RRETURN(MATCH_NOMATCH);
1784 ph10 443 }
1785 nigel 77 GETCHARINCTEST(c, eptr);
1786     if (
1787     #ifdef SUPPORT_UTF8
1788     c >= 256 ||
1789     #endif
1790     (md->ctypes[c] & ctype_space) == 0
1791     )
1792     RRETURN(MATCH_NOMATCH);
1793     ecode++;
1794     break;
1795    
1796     case OP_NOT_WORDCHAR:
1797 ph10 443 if (eptr >= md->end_subject)
1798 ph10 428 {
1799 ph10 443 SCHECK_PARTIAL();
1800 ph10 428 RRETURN(MATCH_NOMATCH);
1801 ph10 443 }
1802 nigel 77 GETCHARINCTEST(c, eptr);
1803     if (
1804     #ifdef SUPPORT_UTF8
1805     c < 256 &&
1806     #endif
1807     (md->ctypes[c] & ctype_word) != 0
1808     )
1809     RRETURN(MATCH_NOMATCH);
1810     ecode++;
1811     break;
1812    
1813     case OP_WORDCHAR:
1814 ph10 443 if (eptr >= md->end_subject)
1815 ph10 428 {
1816 ph10 443 SCHECK_PARTIAL();
1817 ph10 428 RRETURN(MATCH_NOMATCH);
1818 ph10 443 }
1819 nigel 77 GETCHARINCTEST(c, eptr);
1820     if (
1821     #ifdef SUPPORT_UTF8
1822     c >= 256 ||
1823     #endif
1824     (md->ctypes[c] & ctype_word) == 0
1825     )
1826     RRETURN(MATCH_NOMATCH);
1827     ecode++;
1828     break;
1829    
1830 nigel 93 case OP_ANYNL:
1831 ph10 443 if (eptr >= md->end_subject)
1832 ph10 428 {
1833 ph10 443 SCHECK_PARTIAL();
1834 ph10 428 RRETURN(MATCH_NOMATCH);
1835 ph10 443 }
1836 nigel 93 GETCHARINCTEST(c, eptr);
1837     switch(c)
1838     {
1839     default: RRETURN(MATCH_NOMATCH);
1840     case 0x000d:
1841     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1842     break;
1843 ph10 231
1844 nigel 93 case 0x000a:
1845 ph10 231 break;
1846    
1847 nigel 93 case 0x000b:
1848     case 0x000c:
1849     case 0x0085:
1850     case 0x2028:
1851     case 0x2029:
1852 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1853 nigel 93 break;
1854     }
1855     ecode++;
1856     break;
1857    
1858 ph10 178 case OP_NOT_HSPACE:
1859 ph10 443 if (eptr >= md->end_subject)
1860 ph10 428 {
1861 ph10 443 SCHECK_PARTIAL();
1862 ph10 428 RRETURN(MATCH_NOMATCH);
1863 ph10 443 }
1864 ph10 178 GETCHARINCTEST(c, eptr);
1865     switch(c)
1866     {
1867     default: break;
1868     case 0x09: /* HT */
1869     case 0x20: /* SPACE */
1870     case 0xa0: /* NBSP */
1871     case 0x1680: /* OGHAM SPACE MARK */
1872     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1873     case 0x2000: /* EN QUAD */
1874     case 0x2001: /* EM QUAD */
1875     case 0x2002: /* EN SPACE */
1876     case 0x2003: /* EM SPACE */
1877     case 0x2004: /* THREE-PER-EM SPACE */
1878     case 0x2005: /* FOUR-PER-EM SPACE */
1879     case 0x2006: /* SIX-PER-EM SPACE */
1880     case 0x2007: /* FIGURE SPACE */
1881     case 0x2008: /* PUNCTUATION SPACE */
1882     case 0x2009: /* THIN SPACE */
1883     case 0x200A: /* HAIR SPACE */
1884     case 0x202f: /* NARROW NO-BREAK SPACE */
1885     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1886     case 0x3000: /* IDEOGRAPHIC SPACE */
1887     RRETURN(MATCH_NOMATCH);
1888     }
1889     ecode++;
1890     break;
1891    
1892     case OP_HSPACE:
1893 ph10 443 if (eptr >= md->end_subject)
1894 ph10 428 {
1895 ph10 443 SCHECK_PARTIAL();
1896 ph10 428 RRETURN(MATCH_NOMATCH);
1897 ph10 443 }
1898 ph10 178 GETCHARINCTEST(c, eptr);
1899     switch(c)
1900     {
1901     default: RRETURN(MATCH_NOMATCH);
1902     case 0x09: /* HT */
1903     case 0x20: /* SPACE */
1904     case 0xa0: /* NBSP */
1905     case 0x1680: /* OGHAM SPACE MARK */
1906     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1907     case 0x2000: /* EN QUAD */
1908     case 0x2001: /* EM QUAD */
1909     case 0x2002: /* EN SPACE */
1910     case 0x2003: /* EM SPACE */
1911     case 0x2004: /* THREE-PER-EM SPACE */
1912     case 0x2005: /* FOUR-PER-EM SPACE */
1913     case 0x2006: /* SIX-PER-EM SPACE */
1914     case 0x2007: /* FIGURE SPACE */
1915     case 0x2008: /* PUNCTUATION SPACE */
1916     case 0x2009: /* THIN SPACE */
1917     case 0x200A: /* HAIR SPACE */
1918     case 0x202f: /* NARROW NO-BREAK SPACE */
1919     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1920     case 0x3000: /* IDEOGRAPHIC SPACE */
1921     break;
1922     }
1923     ecode++;
1924     break;
1925    
1926     case OP_NOT_VSPACE:
1927 ph10 443 if (eptr >= md->end_subject)
1928 ph10 428 {
1929 ph10 443 SCHECK_PARTIAL();
1930 ph10 428 RRETURN(MATCH_NOMATCH);
1931 ph10 443 }
1932 ph10 178 GETCHARINCTEST(c, eptr);
1933     switch(c)
1934     {
1935     default: break;
1936     case 0x0a: /* LF */
1937     case 0x0b: /* VT */
1938     case 0x0c: /* FF */
1939     case 0x0d: /* CR */
1940     case 0x85: /* NEL */
1941     case 0x2028: /* LINE SEPARATOR */
1942     case 0x2029: /* PARAGRAPH SEPARATOR */
1943     RRETURN(MATCH_NOMATCH);
1944     }
1945     ecode++;
1946     break;
1947    
1948     case OP_VSPACE:
1949 ph10 443 if (eptr >= md->end_subject)
1950 ph10 428 {
1951 ph10 443 SCHECK_PARTIAL();
1952 ph10 428 RRETURN(MATCH_NOMATCH);
1953 ph10 443 }
1954 ph10 178 GETCHARINCTEST(c, eptr);
1955     switch(c)
1956     {
1957     default: RRETURN(MATCH_NOMATCH);
1958     case 0x0a: /* LF */
1959     case 0x0b: /* VT */
1960     case 0x0c: /* FF */
1961     case 0x0d: /* CR */
1962     case 0x85: /* NEL */
1963     case 0x2028: /* LINE SEPARATOR */
1964     case 0x2029: /* PARAGRAPH SEPARATOR */
1965     break;
1966     }
1967     ecode++;
1968     break;
1969    
1970 nigel 77 #ifdef SUPPORT_UCP
1971     /* Check the next character by Unicode property. We will get here only
1972     if the support is in the binary; otherwise a compile-time error occurs. */
1973    
1974     case OP_PROP:
1975     case OP_NOTPROP:
1976 ph10 443 if (eptr >= md->end_subject)
1977 ph10 428 {
1978 ph10 443 SCHECK_PARTIAL();
1979 ph10 428 RRETURN(MATCH_NOMATCH);
1980 ph10 443 }
1981 nigel 77 GETCHARINCTEST(c, eptr);
1982     {
1983 ph10 384 const ucd_record *prop = GET_UCD(c);
1984 nigel 77
1985 nigel 87 switch(ecode[1])
1986     {
1987     case PT_ANY:
1988     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1989     break;
1990 nigel 77
1991 nigel 87 case PT_LAMP:
1992 ph10 349 if ((prop->chartype == ucp_Lu ||
1993     prop->chartype == ucp_Ll ||
1994     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1995 nigel 77 RRETURN(MATCH_NOMATCH);
1996 nigel 87 break;
1997    
1998     case PT_GC:
1999 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2000 nigel 77 RRETURN(MATCH_NOMATCH);
2001 nigel 87 break;
2002    
2003     case PT_PC:
2004 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2005 nigel 87 RRETURN(MATCH_NOMATCH);
2006     break;
2007    
2008     case PT_SC:
2009 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2010 nigel 87 RRETURN(MATCH_NOMATCH);
2011     break;
2012    
2013     default:
2014     RRETURN(PCRE_ERROR_INTERNAL);
2015 nigel 77 }
2016 nigel 87
2017     ecode += 3;
2018 nigel 77 }
2019     break;
2020    
2021     /* Match an extended Unicode sequence. We will get here only if the support
2022     is in the binary; otherwise a compile-time error occurs. */
2023    
2024     case OP_EXTUNI:
2025 ph10 443 if (eptr >= md->end_subject)
2026 ph10 428 {
2027 ph10 443 SCHECK_PARTIAL();
2028 ph10 428 RRETURN(MATCH_NOMATCH);
2029 ph10 443 }
2030 nigel 77 GETCHARINCTEST(c, eptr);
2031     {
2032 ph10 349 int category = UCD_CATEGORY(c);
2033 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2034     while (eptr < md->end_subject)
2035     {
2036     int len = 1;
2037     if (!utf8) c = *eptr; else
2038     {
2039     GETCHARLEN(c, eptr, len);
2040     }
2041 ph10 349 category = UCD_CATEGORY(c);
2042 nigel 77 if (category != ucp_M) break;
2043     eptr += len;
2044     }
2045     }
2046     ecode++;
2047     break;
2048     #endif
2049    
2050    
2051     /* Match a back reference, possibly repeatedly. Look past the end of the
2052     item to see if there is repeat information following. The code is similar
2053     to that for character classes, but repeated for efficiency. Then obey
2054     similar code to character type repeats - written out again for speed.
2055     However, if the referenced string is the empty string, always treat
2056     it as matched, any number of times (otherwise there could be infinite
2057     loops). */
2058    
2059     case OP_REF:
2060     {
2061     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2062 ph10 345 ecode += 3;
2063    
2064 ph10 336 /* If the reference is unset, there are two possibilities:
2065 ph10 345
2066 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2067     than the amount of subject left; this ensures that every attempt at a
2068     match fails. We can't just fail here, because of the possibility of
2069     quantifiers with zero minima.
2070 ph10 345
2071     (b) If the JavaScript compatibility flag is set, set the length to zero
2072     so that the back reference matches an empty string.
2073    
2074     Otherwise, set the length to the length of what was matched by the
2075 ph10 336 referenced subpattern. */
2076 ph10 345
2077 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2078 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2079 ph10 336 else
2080     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2081 nigel 77
2082     /* Set up for repetition, or handle the non-repeated case */
2083    
2084     switch (*ecode)
2085     {
2086     case OP_CRSTAR:
2087     case OP_CRMINSTAR:
2088     case OP_CRPLUS:
2089     case OP_CRMINPLUS:
2090     case OP_CRQUERY:
2091     case OP_CRMINQUERY:
2092     c = *ecode++ - OP_CRSTAR;
2093     minimize = (c & 1) != 0;
2094     min = rep_min[c]; /* Pick up values from tables; */
2095     max = rep_max[c]; /* zero for max => infinity */
2096     if (max == 0) max = INT_MAX;
2097     break;
2098    
2099     case OP_CRRANGE:
2100     case OP_CRMINRANGE:
2101     minimize = (*ecode == OP_CRMINRANGE);
2102     min = GET2(ecode, 1);
2103     max = GET2(ecode, 3);
2104     if (max == 0) max = INT_MAX;
2105     ecode += 5;
2106     break;
2107    
2108     default: /* No repeat follows */
2109 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2110 ph10 428 {
2111 ph10 443 CHECK_PARTIAL();
2112 ph10 428 RRETURN(MATCH_NOMATCH);
2113 ph10 443 }
2114 nigel 77 eptr += length;
2115     continue; /* With the main loop */
2116     }
2117    
2118     /* If the length of the reference is zero, just continue with the
2119     main loop. */
2120 ph10 443
2121 nigel 77 if (length == 0) continue;
2122    
2123     /* First, ensure the minimum number of matches are present. We get back
2124     the length of the reference string explicitly rather than passing the
2125     address of eptr, so that eptr can be a register variable. */
2126    
2127     for (i = 1; i <= min; i++)
2128     {
2129 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2130 ph10 426 {
2131 ph10 427 CHECK_PARTIAL();
2132 ph10 426 RRETURN(MATCH_NOMATCH);
2133 ph10 427 }
2134 nigel 77 eptr += length;
2135     }
2136    
2137     /* If min = max, continue at the same level without recursion.
2138     They are not both allowed to be zero. */
2139    
2140     if (min == max) continue;
2141    
2142     /* If minimizing, keep trying and advancing the pointer */
2143    
2144     if (minimize)
2145     {
2146     for (fi = min;; fi++)
2147     {
2148 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2149 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2150 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2151     if (!match_ref(offset, eptr, length, md, ims))
2152 ph10 426 {
2153 ph10 427 CHECK_PARTIAL();
2154 nigel 77 RRETURN(MATCH_NOMATCH);
2155 ph10 427 }
2156 nigel 77 eptr += length;
2157     }
2158     /* Control never gets here */
2159     }
2160    
2161     /* If maximizing, find the longest string and work backwards */
2162    
2163     else
2164     {
2165     pp = eptr;
2166     for (i = min; i < max; i++)
2167     {
2168 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2169 ph10 462 {
2170 ph10 463 CHECK_PARTIAL();
2171 ph10 462 break;
2172 ph10 463 }
2173 nigel 77 eptr += length;
2174     }
2175     while (eptr >= pp)
2176     {
2177 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2178 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2179     eptr -= length;
2180     }
2181     RRETURN(MATCH_NOMATCH);
2182     }
2183     }
2184     /* Control never gets here */
2185    
2186     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2187     used when all the characters in the class have values in the range 0-255,
2188     and either the matching is caseful, or the characters are in the range
2189     0-127 when UTF-8 processing is enabled. The only difference between
2190     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2191     encountered.
2192    
2193     First, look past the end of the item to see if there is repeat information
2194     following. Then obey similar code to character type repeats - written out
2195     again for speed. */
2196    
2197     case OP_NCLASS:
2198     case OP_CLASS:
2199     {
2200     data = ecode + 1; /* Save for matching */
2201     ecode += 33; /* Advance past the item */
2202    
2203     switch (*ecode)
2204     {
2205     case OP_CRSTAR:
2206     case OP_CRMINSTAR:
2207     case OP_CRPLUS:
2208     case OP_CRMINPLUS:
2209     case OP_CRQUERY:
2210     case OP_CRMINQUERY:
2211     c = *ecode++ - OP_CRSTAR;
2212     minimize = (c & 1) != 0;
2213     min = rep_min[c]; /* Pick up values from tables; */
2214     max = rep_max[c]; /* zero for max => infinity */
2215     if (max == 0) max = INT_MAX;
2216     break;
2217    
2218     case OP_CRRANGE:
2219     case OP_CRMINRANGE:
2220     minimize = (*ecode == OP_CRMINRANGE);
2221     min = GET2(ecode, 1);
2222     max = GET2(ecode, 3);
2223     if (max == 0) max = INT_MAX;
2224     ecode += 5;
2225     break;
2226    
2227     default: /* No repeat follows */
2228     min = max = 1;
2229     break;
2230     }
2231    
2232     /* First, ensure the minimum number of matches are present. */
2233    
2234     #ifdef SUPPORT_UTF8
2235     /* UTF-8 mode */
2236     if (utf8)
2237     {
2238     for (i = 1; i <= min; i++)
2239     {
2240 ph10 427 if (eptr >= md->end_subject)
2241 ph10 426 {
2242 ph10 428 SCHECK_PARTIAL();
2243 ph10 426 RRETURN(MATCH_NOMATCH);
2244 ph10 427 }
2245 nigel 77 GETCHARINC(c, eptr);
2246     if (c > 255)
2247     {
2248     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2249     }
2250     else
2251     {
2252     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2253     }
2254     }
2255     }
2256     else
2257     #endif
2258     /* Not UTF-8 mode */
2259     {
2260     for (i = 1; i <= min; i++)
2261     {
2262 ph10 427 if (eptr >= md->end_subject)
2263 ph10 426 {
2264 ph10 428 SCHECK_PARTIAL();
2265 ph10 426 RRETURN(MATCH_NOMATCH);
2266 ph10 427 }
2267 nigel 77 c = *eptr++;
2268     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2269     }
2270     }
2271    
2272     /* If max == min we can continue with the main loop without the
2273     need to recurse. */
2274    
2275     if (min == max) continue;
2276    
2277     /* If minimizing, keep testing the rest of the expression and advancing
2278     the pointer while it matches the class. */
2279    
2280     if (minimize)
2281     {
2282     #ifdef SUPPORT_UTF8
2283     /* UTF-8 mode */
2284     if (utf8)
2285     {
2286     for (fi = min;; fi++)
2287     {
2288 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2289 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2290 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2291 ph10 427 if (eptr >= md->end_subject)
2292 ph10 426 {
2293 ph10 427 SCHECK_PARTIAL();
2294 ph10 426 RRETURN(MATCH_NOMATCH);
2295 ph10 427 }
2296 nigel 77 GETCHARINC(c, eptr);
2297     if (c > 255)
2298     {
2299     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2300     }
2301     else
2302     {
2303     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2304     }
2305     }
2306     }
2307     else
2308     #endif
2309     /* Not UTF-8 mode */
2310     {
2311     for (fi = min;; fi++)
2312     {
2313 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2314 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2315 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2316 ph10 427 if (eptr >= md->end_subject)
2317 ph10 426 {
2318 ph10 427 SCHECK_PARTIAL();
2319 ph10 426 RRETURN(MATCH_NOMATCH);
2320 ph10 427 }
2321 nigel 77 c = *eptr++;
2322     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2323     }
2324     }
2325     /* Control never gets here */
2326     }
2327    
2328     /* If maximizing, find the longest possible run, then work backwards. */
2329    
2330     else
2331     {
2332     pp = eptr;
2333    
2334     #ifdef SUPPORT_UTF8
2335     /* UTF-8 mode */
2336     if (utf8)
2337     {
2338     for (i = min; i < max; i++)
2339     {
2340     int len = 1;
2341 ph10 463 if (eptr >= md->end_subject)
2342 ph10 462 {
2343 ph10 463 SCHECK_PARTIAL();
2344 ph10 462 break;
2345 ph10 463 }
2346 nigel 77 GETCHARLEN(c, eptr, len);
2347     if (c > 255)
2348     {
2349     if (op == OP_CLASS) break;
2350     }
2351     else
2352     {
2353     if ((data[c/8] & (1 << (c&7))) == 0) break;
2354     }
2355     eptr += len;
2356     }
2357     for (;;)
2358     {
2359 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2360 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2361     if (eptr-- == pp) break; /* Stop if tried at original pos */
2362     BACKCHAR(eptr);
2363     }
2364     }
2365     else
2366     #endif
2367     /* Not UTF-8 mode */
2368     {
2369     for (i = min; i < max; i++)
2370     {
2371 ph10 463 if (eptr >= md->end_subject)
2372 ph10 462 {
2373 ph10 463 SCHECK_PARTIAL();
2374 ph10 462 break;
2375 ph10 463 }
2376 nigel 77 c = *eptr;
2377     if ((data[c/8] & (1 << (c&7))) == 0) break;
2378     eptr++;
2379     }
2380     while (eptr >= pp)
2381     {
2382 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2383 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2384 nigel 77 eptr--;
2385     }
2386     }
2387    
2388     RRETURN(MATCH_NOMATCH);
2389     }
2390     }
2391     /* Control never gets here */
2392    
2393    
2394     /* Match an extended character class. This opcode is encountered only
2395 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2396     mode, because Unicode properties are supported in non-UTF-8 mode. */
2397 nigel 77
2398     #ifdef SUPPORT_UTF8
2399     case OP_XCLASS:
2400     {
2401     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2402     ecode += GET(ecode, 1); /* Advance past the item */
2403    
2404     switch (*ecode)
2405     {
2406     case OP_CRSTAR:
2407     case OP_CRMINSTAR:
2408     case OP_CRPLUS:
2409     case OP_CRMINPLUS:
2410     case OP_CRQUERY:
2411     case OP_CRMINQUERY:
2412     c = *ecode++ - OP_CRSTAR;
2413     minimize = (c & 1) != 0;
2414     min = rep_min[c]; /* Pick up values from tables; */
2415     max = rep_max[c]; /* zero for max => infinity */
2416     if (max == 0) max = INT_MAX;
2417     break;
2418    
2419     case OP_CRRANGE:
2420     case OP_CRMINRANGE:
2421     minimize = (*ecode == OP_CRMINRANGE);
2422     min = GET2(ecode, 1);
2423     max = GET2(ecode, 3);
2424     if (max == 0) max = INT_MAX;
2425     ecode += 5;
2426     break;
2427    
2428     default: /* No repeat follows */
2429     min = max = 1;
2430     break;
2431     }
2432    
2433     /* First, ensure the minimum number of matches are present. */
2434    
2435     for (i = 1; i <= min; i++)
2436     {
2437 ph10 427 if (eptr >= md->end_subject)
2438 ph10 426 {
2439     SCHECK_PARTIAL();
2440     RRETURN(MATCH_NOMATCH);
2441 ph10 427 }
2442 ph10 384 GETCHARINCTEST(c, eptr);
2443 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2444     }
2445    
2446     /* If max == min we can continue with the main loop without the
2447     need to recurse. */
2448    
2449     if (min == max) continue;
2450    
2451     /* If minimizing, keep testing the rest of the expression and advancing
2452     the pointer while it matches the class. */
2453    
2454     if (minimize)
2455     {
2456     for (fi = min;; fi++)
2457     {
2458 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2459 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2461 ph10 427 if (eptr >= md->end_subject)
2462 ph10 426 {
2463 ph10 427 SCHECK_PARTIAL();
2464 ph10 426 RRETURN(MATCH_NOMATCH);
2465 ph10 427 }
2466 ph10 384 GETCHARINCTEST(c, eptr);
2467 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2468     }
2469     /* Control never gets here */
2470     }
2471    
2472     /* If maximizing, find the longest possible run, then work backwards. */
2473    
2474     else
2475     {
2476     pp = eptr;
2477     for (i = min; i < max; i++)
2478     {
2479     int len = 1;
2480 ph10 463 if (eptr >= md->end_subject)
2481 ph10 462 {
2482 ph10 463 SCHECK_PARTIAL();
2483 ph10 462 break;
2484 ph10 463 }
2485 ph10 384 GETCHARLENTEST(c, eptr, len);
2486 nigel 77 if (!_pcre_xclass(c, data)) break;
2487     eptr += len;
2488     }
2489     for(;;)
2490     {
2491 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2492 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2493     if (eptr-- == pp) break; /* Stop if tried at original pos */
2494 ph10 214 if (utf8) BACKCHAR(eptr);
2495 nigel 77 }
2496     RRETURN(MATCH_NOMATCH);
2497     }
2498    
2499     /* Control never gets here */
2500     }
2501     #endif /* End of XCLASS */
2502    
2503     /* Match a single character, casefully */
2504    
2505     case OP_CHAR:
2506     #ifdef SUPPORT_UTF8
2507     if (utf8)
2508     {
2509     length = 1;
2510     ecode++;
2511     GETCHARLEN(fc, ecode, length);
2512 ph10 443 if (length > md->end_subject - eptr)
2513 ph10 428 {
2514     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2515     RRETURN(MATCH_NOMATCH);
2516 ph10 443 }
2517 nigel 77 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2518     }
2519     else
2520     #endif
2521    
2522     /* Non-UTF-8 mode */
2523     {
2524 ph10 443 if (md->end_subject - eptr < 1)
2525 ph10 428 {
2526     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2527     RRETURN(MATCH_NOMATCH);
2528 ph10 443 }
2529 nigel 77 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2530     ecode += 2;
2531     }
2532     break;
2533    
2534     /* Match a single character, caselessly */
2535    
2536     case OP_CHARNC:
2537     #ifdef SUPPORT_UTF8
2538     if (utf8)
2539     {
2540     length = 1;
2541     ecode++;
2542     GETCHARLEN(fc, ecode, length);
2543    
2544 ph10 443 if (length > md->end_subject - eptr)
2545 ph10 428 {
2546     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2547     RRETURN(MATCH_NOMATCH);
2548 ph10 443 }
2549 nigel 77
2550     /* If the pattern character's value is < 128, we have only one byte, and
2551     can use the fast lookup table. */
2552    
2553     if (fc < 128)
2554     {
2555     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2556     }
2557    
2558     /* Otherwise we must pick up the subject character */
2559    
2560     else
2561     {
2562 nigel 93 unsigned int dc;
2563 nigel 77 GETCHARINC(dc, eptr);
2564     ecode += length;
2565    
2566     /* If we have Unicode property support, we can use it to test the other
2567 nigel 87 case of the character, if there is one. */
2568 nigel 77
2569     if (fc != dc)
2570     {
2571     #ifdef SUPPORT_UCP
2572 ph10 349 if (dc != UCD_OTHERCASE(fc))
2573 nigel 77 #endif
2574     RRETURN(MATCH_NOMATCH);
2575     }
2576     }
2577     }
2578     else
2579     #endif /* SUPPORT_UTF8 */
2580    
2581     /* Non-UTF-8 mode */
2582     {
2583 ph10 443 if (md->end_subject - eptr < 1)
2584 ph10 428 {
2585 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2586 ph10 428 RRETURN(MATCH_NOMATCH);
2587 ph10 443 }
2588 nigel 77 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2589     ecode += 2;
2590     }
2591     break;
2592    
2593 nigel 93 /* Match a single character repeatedly. */
2594 nigel 77
2595     case OP_EXACT:
2596     min = max = GET2(ecode, 1);
2597     ecode += 3;
2598     goto REPEATCHAR;
2599    
2600 nigel 93 case OP_POSUPTO:
2601     possessive = TRUE;
2602     /* Fall through */
2603    
2604 nigel 77 case OP_UPTO:
2605     case OP_MINUPTO:
2606     min = 0;
2607     max = GET2(ecode, 1);
2608     minimize = *ecode == OP_MINUPTO;
2609     ecode += 3;
2610     goto REPEATCHAR;
2611    
2612 nigel 93 case OP_POSSTAR:
2613     possessive = TRUE;
2614     min = 0;
2615     max = INT_MAX;
2616     ecode++;
2617     goto REPEATCHAR;
2618    
2619     case OP_POSPLUS:
2620     possessive = TRUE;
2621     min = 1;
2622     max = INT_MAX;
2623     ecode++;
2624     goto REPEATCHAR;
2625    
2626     case OP_POSQUERY:
2627     possessive = TRUE;
2628     min = 0;
2629     max = 1;
2630     ecode++;
2631     goto REPEATCHAR;
2632    
2633 nigel 77 case OP_STAR:
2634     case OP_MINSTAR:
2635     case OP_PLUS:
2636     case OP_MINPLUS:
2637     case OP_QUERY:
2638     case OP_MINQUERY:
2639     c = *ecode++ - OP_STAR;
2640     minimize = (c & 1) != 0;
2641 ph10 443
2642 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2643     max = rep_max[c]; /* zero for max => infinity */
2644     if (max == 0) max = INT_MAX;
2645    
2646 ph10 426 /* Common code for all repeated single-character matches. */
2647 nigel 77
2648     REPEATCHAR:
2649     #ifdef SUPPORT_UTF8
2650     if (utf8)
2651     {
2652     length = 1;
2653     charptr = ecode;
2654     GETCHARLEN(fc, ecode, length);
2655     ecode += length;
2656    
2657     /* Handle multibyte character matching specially here. There is
2658     support for caseless matching if UCP support is present. */
2659    
2660     if (length > 1)
2661     {
2662     #ifdef SUPPORT_UCP
2663 nigel 93 unsigned int othercase;
2664 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2665 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2666 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2667 ph10 115 else oclength = 0;
2668 nigel 77 #endif /* SUPPORT_UCP */
2669    
2670     for (i = 1; i <= min; i++)
2671     {
2672 ph10 426 if (eptr <= md->end_subject - length &&
2673     memcmp(eptr, charptr, length) == 0) eptr += length;
2674 ph10 123 #ifdef SUPPORT_UCP
2675 ph10 426 else if (oclength > 0 &&
2676     eptr <= md->end_subject - oclength &&
2677     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2678     #endif /* SUPPORT_UCP */
2679 nigel 77 else
2680     {
2681 ph10 426 CHECK_PARTIAL();
2682     RRETURN(MATCH_NOMATCH);
2683 nigel 77 }
2684     }
2685    
2686     if (min == max) continue;
2687    
2688     if (minimize)
2689     {
2690     for (fi = min;; fi++)
2691     {
2692 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2693 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2694 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2695 ph10 426 if (eptr <= md->end_subject - length &&
2696     memcmp(eptr, charptr, length) == 0) eptr += length;
2697 ph10 123 #ifdef SUPPORT_UCP
2698 ph10 426 else if (oclength > 0 &&
2699     eptr <= md->end_subject - oclength &&
2700     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2701     #endif /* SUPPORT_UCP */
2702 nigel 77 else
2703     {
2704 ph10 426 CHECK_PARTIAL();
2705     RRETURN(MATCH_NOMATCH);
2706 nigel 77 }
2707     }
2708     /* Control never gets here */
2709     }
2710 nigel 93
2711     else /* Maximize */
2712 nigel 77 {
2713     pp = eptr;
2714     for (i = min; i < max; i++)
2715     {
2716 ph10 426 if (eptr <= md->end_subject - length &&
2717     memcmp(eptr, charptr, length) == 0) eptr += length;
2718 ph10 123 #ifdef SUPPORT_UCP
2719 ph10 426 else if (oclength > 0 &&
2720     eptr <= md->end_subject - oclength &&
2721     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2722     #endif /* SUPPORT_UCP */
2723 ph10 463 else
2724 ph10 462 {
2725 ph10 463 CHECK_PARTIAL();
2726 ph10 462 break;
2727 ph10 463 }
2728 nigel 77 }
2729 nigel 93
2730     if (possessive) continue;
2731 ph10 427
2732 ph10 120 for(;;)
2733 ph10 426 {
2734     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2735     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2736     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2737 ph10 115 #ifdef SUPPORT_UCP
2738 ph10 426 eptr--;
2739     BACKCHAR(eptr);
2740 ph10 123 #else /* without SUPPORT_UCP */
2741 ph10 426 eptr -= length;
2742 ph10 123 #endif /* SUPPORT_UCP */
2743 ph10 426 }
2744 nigel 77 }
2745     /* Control never gets here */
2746     }
2747    
2748     /* If the length of a UTF-8 character is 1, we fall through here, and
2749     obey the code as for non-UTF-8 characters below, though in this case the
2750     value of fc will always be < 128. */
2751     }
2752     else
2753     #endif /* SUPPORT_UTF8 */
2754    
2755     /* When not in UTF-8 mode, load a single-byte character. */
2756    
2757 ph10 426 fc = *ecode++;
2758 ph10 443
2759 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2760     may not be in UTF-8 mode. The code is duplicated for the caseless and
2761     caseful cases, for speed, since matching characters is likely to be quite
2762     common. First, ensure the minimum number of matches are present. If min =
2763     max, continue at the same level without recursing. Otherwise, if
2764     minimizing, keep trying the rest of the expression and advancing one
2765     matching character if failing, up to the maximum. Alternatively, if
2766     maximizing, find the maximum number of characters and work backwards. */
2767    
2768     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2769     max, eptr));
2770    
2771     if ((ims & PCRE_CASELESS) != 0)
2772     {
2773     fc = md->lcc[fc];
2774     for (i = 1; i <= min; i++)
2775 ph10 426 {
2776     if (eptr >= md->end_subject)
2777     {
2778     SCHECK_PARTIAL();
2779     RRETURN(MATCH_NOMATCH);
2780     }
2781 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2782 ph10 426 }
2783 nigel 77 if (min == max) continue;
2784     if (minimize)
2785     {
2786     for (fi = min;; fi++)
2787     {
2788 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2789 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2790 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2791 ph10 426 if (eptr >= md->end_subject)
2792     {
2793 ph10 427 SCHECK_PARTIAL();
2794 ph10 426 RRETURN(MATCH_NOMATCH);
2795     }
2796     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2797 nigel 77 }
2798     /* Control never gets here */
2799     }
2800 nigel 93 else /* Maximize */
2801 nigel 77 {
2802     pp = eptr;
2803     for (i = min; i < max; i++)
2804     {
2805 ph10 463 if (eptr >= md->end_subject)
2806 ph10 462 {
2807     SCHECK_PARTIAL();
2808     break;
2809 ph10 463 }
2810 ph10 462 if (fc != md->lcc[*eptr]) break;
2811 nigel 77 eptr++;
2812     }
2813 ph10 427
2814 nigel 93 if (possessive) continue;
2815 ph10 427
2816 nigel 77 while (eptr >= pp)
2817     {
2818 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2819 nigel 77 eptr--;
2820     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2821     }
2822     RRETURN(MATCH_NOMATCH);
2823     }
2824     /* Control never gets here */
2825     }
2826    
2827     /* Caseful comparisons (includes all multi-byte characters) */
2828    
2829     else
2830     {
2831 ph10 427 for (i = 1; i <= min; i++)
2832 ph10 426 {
2833     if (eptr >= md->end_subject)
2834     {
2835     SCHECK_PARTIAL();
2836     RRETURN(MATCH_NOMATCH);
2837     }
2838     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2839 ph10 427 }
2840 ph10 443
2841 nigel 77 if (min == max) continue;
2842 ph10 443
2843 nigel 77 if (minimize)
2844     {
2845     for (fi = min;; fi++)
2846     {
2847 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2848 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2849 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2850 ph10 426 if (eptr >= md->end_subject)
2851 ph10 427 {
2852 ph10 426 SCHECK_PARTIAL();
2853     RRETURN(MATCH_NOMATCH);
2854 ph10 427 }
2855 ph10 426 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2856 nigel 77 }
2857     /* Control never gets here */
2858     }
2859 nigel 93 else /* Maximize */
2860 nigel 77 {
2861     pp = eptr;
2862     for (i = min; i < max; i++)
2863     {
2864 ph10 463 if (eptr >= md->end_subject)
2865 ph10 462 {
2866 ph10 463 SCHECK_PARTIAL();
2867 ph10 462 break;
2868 ph10 463 }
2869 ph10 462 if (fc != *eptr) break;
2870 nigel 77 eptr++;
2871     }
2872 nigel 93 if (possessive) continue;
2873 ph10 443
2874 nigel 77 while (eptr >= pp)
2875     {
2876 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2877 nigel 77 eptr--;
2878     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2879     }
2880     RRETURN(MATCH_NOMATCH);
2881     }
2882     }
2883     /* Control never gets here */
2884    
2885     /* Match a negated single one-byte character. The character we are
2886     checking can be multibyte. */
2887    
2888     case OP_NOT:
2889 ph10 443 if (eptr >= md->end_subject)
2890 ph10 428 {
2891 ph10 443 SCHECK_PARTIAL();
2892 ph10 428 RRETURN(MATCH_NOMATCH);
2893 ph10 443 }
2894 nigel 77 ecode++;
2895     GETCHARINCTEST(c, eptr);
2896     if ((ims & PCRE_CASELESS) != 0)
2897     {
2898     #ifdef SUPPORT_UTF8
2899     if (c < 256)
2900     #endif
2901     c = md->lcc[c];
2902     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2903     }
2904     else
2905     {
2906     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2907     }
2908     break;
2909    
2910     /* Match a negated single one-byte character repeatedly. This is almost a
2911     repeat of the code for a repeated single character, but I haven't found a
2912     nice way of commoning these up that doesn't require a test of the
2913     positive/negative option for each character match. Maybe that wouldn't add
2914     very much to the time taken, but character matching *is* what this is all
2915     about... */
2916    
2917     case OP_NOTEXACT:
2918     min = max = GET2(ecode, 1);
2919     ecode += 3;
2920     goto REPEATNOTCHAR;
2921    
2922     case OP_NOTUPTO:
2923     case OP_NOTMINUPTO:
2924     min = 0;
2925     max = GET2(ecode, 1);
2926     minimize = *ecode == OP_NOTMINUPTO;
2927     ecode += 3;
2928     goto REPEATNOTCHAR;
2929    
2930 nigel 93 case OP_NOTPOSSTAR:
2931     possessive = TRUE;
2932     min = 0;
2933     max = INT_MAX;
2934     ecode++;
2935     goto REPEATNOTCHAR;
2936    
2937     case OP_NOTPOSPLUS:
2938     possessive = TRUE;
2939     min = 1;
2940     max = INT_MAX;
2941     ecode++;
2942     goto REPEATNOTCHAR;
2943    
2944     case OP_NOTPOSQUERY:
2945     possessive = TRUE;
2946     min = 0;
2947     max = 1;
2948     ecode++;
2949     goto REPEATNOTCHAR;
2950    
2951     case OP_NOTPOSUPTO:
2952     possessive = TRUE;
2953     min = 0;
2954     max = GET2(ecode, 1);
2955     ecode += 3;
2956     goto REPEATNOTCHAR;
2957    
2958 nigel 77 case OP_NOTSTAR:
2959     case OP_NOTMINSTAR:
2960     case OP_NOTPLUS:
2961     case OP_NOTMINPLUS:
2962     case OP_NOTQUERY:
2963     case OP_NOTMINQUERY:
2964     c = *ecode++ - OP_NOTSTAR;
2965     minimize = (c & 1) != 0;
2966     min = rep_min[c]; /* Pick up values from tables; */
2967     max = rep_max[c]; /* zero for max => infinity */
2968     if (max == 0) max = INT_MAX;
2969    
2970 ph10 426 /* Common code for all repeated single-byte matches. */
2971 nigel 77
2972     REPEATNOTCHAR:
2973     fc = *ecode++;
2974    
2975     /* The code is duplicated for the caseless and caseful cases, for speed,
2976     since matching characters is likely to be quite common. First, ensure the
2977     minimum number of matches are present. If min = max, continue at the same
2978     level without recursing. Otherwise, if minimizing, keep trying the rest of
2979     the expression and advancing one matching character if failing, up to the
2980     maximum. Alternatively, if maximizing, find the maximum number of
2981     characters and work backwards. */
2982    
2983     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2984     max, eptr));
2985    
2986     if ((ims & PCRE_CASELESS) != 0)
2987     {
2988     fc = md->lcc[fc];
2989    
2990     #ifdef SUPPORT_UTF8
2991     /* UTF-8 mode */
2992     if (utf8)
2993     {
2994 nigel 93 register unsigned int d;
2995 nigel 77 for (i = 1; i <= min; i++)
2996     {
2997 ph10 426 if (eptr >= md->end_subject)
2998     {
2999     SCHECK_PARTIAL();
3000 ph10 427 RRETURN(MATCH_NOMATCH);
3001     }
3002 nigel 77 GETCHARINC(d, eptr);
3003     if (d < 256) d = md->lcc[d];
3004     if (fc == d) RRETURN(MATCH_NOMATCH);
3005     }
3006     }
3007     else
3008     #endif
3009    
3010     /* Not UTF-8 mode */
3011     {
3012     for (i = 1; i <= min; i++)
3013 ph10 426 {
3014     if (eptr >= md->end_subject)
3015     {
3016     SCHECK_PARTIAL();
3017 ph10 427 RRETURN(MATCH_NOMATCH);
3018     }
3019 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3020 ph10 427 }
3021 nigel 77 }
3022    
3023     if (min == max) continue;
3024    
3025     if (minimize)
3026     {
3027     #ifdef SUPPORT_UTF8
3028     /* UTF-8 mode */
3029     if (utf8)
3030     {
3031 nigel 93 register unsigned int d;
3032 nigel 77 for (fi = min;; fi++)
3033     {
3034 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3035 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3036 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3037 ph10 427 if (eptr >= md->end_subject)
3038 ph10 426 {
3039 ph10 427 SCHECK_PARTIAL();
3040 ph10 426 RRETURN(MATCH_NOMATCH);
3041 ph10 427 }
3042 nigel 77 GETCHARINC(d, eptr);
3043     if (d < 256) d = md->lcc[d];
3044 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
3045 nigel 77 }
3046     }
3047     else
3048     #endif
3049     /* Not UTF-8 mode */
3050     {
3051     for (fi = min;; fi++)
3052     {
3053 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3054 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3056 ph10 426 if (eptr >= md->end_subject)
3057     {
3058     SCHECK_PARTIAL();
3059     RRETURN(MATCH_NOMATCH);
3060     }
3061     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3062 nigel 77 }
3063     }
3064     /* Control never gets here */
3065     }
3066    
3067     /* Maximize case */
3068    
3069     else
3070     {
3071     pp = eptr;
3072    
3073     #ifdef SUPPORT_UTF8
3074     /* UTF-8 mode */
3075     if (utf8)
3076     {
3077 nigel 93 register unsigned int d;
3078 nigel 77 for (i = min; i < max; i++)
3079     {
3080     int len = 1;
3081 ph10 463 if (eptr >= md->end_subject)
3082 ph10 462 {
3083 ph10 463 SCHECK_PARTIAL();
3084 ph10 462 break;
3085 ph10 463 }
3086 nigel 77 GETCHARLEN(d, eptr, len);
3087     if (d < 256) d = md->lcc[d];
3088     if (fc == d) break;
3089     eptr += len;
3090     }
3091 nigel 93 if (possessive) continue;
3092     for(;;)
3093 nigel 77 {
3094 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3095 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3096     if (eptr-- == pp) break; /* Stop if tried at original pos */
3097     BACKCHAR(eptr);
3098     }
3099     }
3100     else
3101     #endif
3102     /* Not UTF-8 mode */
3103     {
3104     for (i = min; i < max; i++)
3105     {
3106 ph10 463 if (eptr >= md->end_subject)
3107 ph10 462 {
3108     SCHECK_PARTIAL();
3109     break;
3110 ph10 463 }
3111 ph10 462 if (fc == md->lcc[*eptr]) break;
3112 nigel 77 eptr++;
3113     }
3114 nigel 93 if (possessive) continue;
3115 nigel 77 while (eptr >= pp)
3116     {
3117 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3118 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3119     eptr--;
3120     }
3121     }
3122    
3123     RRETURN(MATCH_NOMATCH);
3124     }
3125     /* Control never gets here */
3126     }
3127    
3128     /* Caseful comparisons */
3129    
3130     else
3131     {
3132     #ifdef SUPPORT_UTF8
3133     /* UTF-8 mode */
3134     if (utf8)
3135     {
3136 nigel 93 register unsigned int d;
3137 nigel 77 for (i = 1; i <= min; i++)
3138     {
3139 ph10 426 if (eptr >= md->end_subject)
3140     {
3141     SCHECK_PARTIAL();
3142 ph10 427 RRETURN(MATCH_NOMATCH);
3143     }
3144 nigel 77 GETCHARINC(d, eptr);
3145     if (fc == d) RRETURN(MATCH_NOMATCH);
3146     }
3147     }
3148     else
3149     #endif
3150     /* Not UTF-8 mode */
3151     {
3152     for (i = 1; i <= min; i++)
3153 ph10 426 {
3154     if (eptr >= md->end_subject)
3155     {
3156     SCHECK_PARTIAL();
3157 ph10 427 RRETURN(MATCH_NOMATCH);
3158     }
3159 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3160 ph10 427 }
3161 nigel 77 }
3162    
3163     if (min == max) continue;
3164    
3165     if (minimize)
3166     {
3167     #ifdef SUPPORT_UTF8
3168     /* UTF-8 mode */
3169     if (utf8)
3170     {
3171 nigel 93 register unsigned int d;
3172 nigel 77 for (fi = min;; fi++)
3173     {
3174 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3175 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3176 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3177 ph10 427 if (eptr >= md->end_subject)
3178 ph10 426 {
3179 ph10 427 SCHECK_PARTIAL();
3180 ph10 426 RRETURN(MATCH_NOMATCH);
3181 ph10 427 }
3182 nigel 77 GETCHARINC(d, eptr);
3183 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
3184 nigel 77 }
3185     }
3186     else
3187     #endif
3188     /* Not UTF-8 mode */
3189     {
3190     for (fi = min;; fi++)
3191     {
3192 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3193 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3195 ph10 426 if (eptr >= md->end_subject)
3196     {
3197     SCHECK_PARTIAL();
3198     RRETURN(MATCH_NOMATCH);
3199 ph10 427 }
3200 ph10 426 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3201 nigel 77 }
3202     }
3203     /* Control never gets here */
3204     }
3205    
3206     /* Maximize case */
3207    
3208     else
3209     {
3210     pp = eptr;
3211    
3212     #ifdef SUPPORT_UTF8
3213     /* UTF-8 mode */
3214     if (utf8)
3215     {
3216 nigel 93 register unsigned int d;
3217 nigel 77 for (i = min; i < max; i++)
3218     {
3219     int len = 1;
3220 ph10 463 if (eptr >= md->end_subject)
3221 ph10 462 {
3222 ph10 463 SCHECK_PARTIAL();
3223 ph10 462 break;
3224 ph10 463 }
3225 nigel 77 GETCHARLEN(d, eptr, len);
3226     if (fc == d) break;
3227     eptr += len;
3228     }
3229 nigel 93 if (possessive) continue;
3230 nigel 77 for(;;)
3231     {
3232 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3233 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3234     if (eptr-- == pp) break; /* Stop if tried at original pos */
3235     BACKCHAR(eptr);
3236     }
3237     }
3238     else
3239     #endif
3240     /* Not UTF-8 mode */
3241     {
3242     for (i = min; i < max; i++)
3243     {
3244 ph10 463 if (eptr >= md->end_subject)
3245 ph10 462 {
3246 ph10 463 SCHECK_PARTIAL();
3247 ph10 462 break;
3248 ph10 463 }
3249 ph10 462 if (fc == *eptr) break;
3250 nigel 77 eptr++;
3251     }
3252 nigel 93 if (possessive) continue;
3253 nigel 77 while (eptr >= pp)
3254     {
3255 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3256 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257     eptr--;
3258     }
3259     }
3260    
3261     RRETURN(MATCH_NOMATCH);
3262     }
3263     }
3264     /* Control never gets here */
3265    
3266     /* Match a single character type repeatedly; several different opcodes
3267     share code. This is very similar to the code for single characters, but we
3268     repeat it in the interests of efficiency. */
3269    
3270     case OP_TYPEEXACT:
3271     min = max = GET2(ecode, 1);
3272     minimize = TRUE;
3273     ecode += 3;
3274     goto REPEATTYPE;
3275    
3276     case OP_TYPEUPTO:
3277     case OP_TYPEMINUPTO:
3278     min = 0;
3279     max = GET2(ecode, 1);
3280     minimize = *ecode == OP_TYPEMINUPTO;
3281     ecode += 3;
3282     goto REPEATTYPE;
3283    
3284 nigel 93 case OP_TYPEPOSSTAR:
3285     possessive = TRUE;
3286     min = 0;
3287     max = INT_MAX;
3288     ecode++;
3289     goto REPEATTYPE;
3290    
3291     case OP_TYPEPOSPLUS:
3292     possessive = TRUE;
3293     min = 1;
3294     max = INT_MAX;
3295     ecode++;
3296     goto REPEATTYPE;
3297    
3298     case OP_TYPEPOSQUERY:
3299     possessive = TRUE;
3300     min = 0;
3301     max = 1;
3302     ecode++;
3303     goto REPEATTYPE;
3304    
3305     case OP_TYPEPOSUPTO:
3306     possessive = TRUE;
3307     min = 0;
3308     max = GET2(ecode, 1);
3309     ecode += 3;
3310     goto REPEATTYPE;
3311    
3312 nigel 77 case OP_TYPESTAR:
3313     case OP_TYPEMINSTAR:
3314     case OP_TYPEPLUS:
3315     case OP_TYPEMINPLUS:
3316     case OP_TYPEQUERY:
3317     case OP_TYPEMINQUERY:
3318     c = *ecode++ - OP_TYPESTAR;
3319     minimize = (c & 1) != 0;
3320     min = rep_min[c]; /* Pick up values from tables; */
3321     max = rep_max[c]; /* zero for max => infinity */
3322     if (max == 0) max = INT_MAX;
3323    
3324     /* Common code for all repeated single character type matches. Note that
3325     in UTF-8 mode, '.' matches a character of any length, but for the other
3326     character types, the valid characters are all one-byte long. */
3327    
3328     REPEATTYPE:
3329     ctype = *ecode++; /* Code for the character type */
3330    
3331     #ifdef SUPPORT_UCP
3332     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3333     {
3334     prop_fail_result = ctype == OP_NOTPROP;
3335     prop_type = *ecode++;
3336 nigel 87 prop_value = *ecode++;
3337 nigel 77 }
3338     else prop_type = -1;
3339     #endif
3340    
3341     /* First, ensure the minimum number of matches are present. Use inline
3342     code for maximizing the speed, and do the type test once at the start
3343 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3344 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3345     and single-bytes. */
3346    
3347     if (min > 0)
3348     {
3349     #ifdef SUPPORT_UCP
3350 nigel 87 if (prop_type >= 0)
3351 nigel 77 {
3352 nigel 87 switch(prop_type)
3353 nigel 77 {
3354 nigel 87 case PT_ANY:
3355     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3356     for (i = 1; i <= min; i++)
3357     {
3358 ph10 427 if (eptr >= md->end_subject)
3359 ph10 426 {
3360 ph10 427 SCHECK_PARTIAL();
3361 ph10 426 RRETURN(MATCH_NOMATCH);
3362 ph10 427 }
3363 ph10 184 GETCHARINCTEST(c, eptr);
3364 nigel 87 }
3365     break;
3366    
3367     case PT_LAMP:
3368     for (i = 1; i <= min; i++)
3369     {
3370 ph10 427 if (eptr >= md->end_subject)
3371 ph10 426 {
3372 ph10 427 SCHECK_PARTIAL();
3373 ph10 426 RRETURN(MATCH_NOMATCH);
3374 ph10 427 }
3375 ph10 184 GETCHARINCTEST(c, eptr);
3376 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3377 nigel 87 if ((prop_chartype == ucp_Lu ||
3378     prop_chartype == ucp_Ll ||
3379     prop_chartype == ucp_Lt) == prop_fail_result)
3380     RRETURN(MATCH_NOMATCH);
3381     }
3382     break;
3383    
3384     case PT_GC:
3385     for (i = 1; i <= min; i++)
3386     {
3387 ph10 427 if (eptr >= md->end_subject)
3388 ph10 426 {
3389 ph10 427 SCHECK_PARTIAL();
3390 ph10 426 RRETURN(MATCH_NOMATCH);
3391 ph10 427 }
3392 ph10 184 GETCHARINCTEST(c, eptr);
3393 ph10 349 prop_category = UCD_CATEGORY(c);
3394 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3395     RRETURN(MATCH_NOMATCH);
3396     }
3397     break;
3398    
3399     case PT_PC:
3400     for (i = 1; i <= min; i++)
3401     {
3402 ph10 427 if (eptr >= md->end_subject)
3403 ph10 426 {
3404 ph10 427 SCHECK_PARTIAL();
3405 ph10 426 RRETURN(MATCH_NOMATCH);
3406 ph10 427 }
3407 ph10 184 GETCHARINCTEST(c, eptr);
3408 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3409 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3410     RRETURN(MATCH_NOMATCH);
3411     }
3412     break;
3413    
3414     case PT_SC:
3415     for (i = 1; i <= min; i++)
3416     {
3417 ph10 427 if (eptr >= md->end_subject)
3418 ph10 426 {
3419 ph10 427 SCHECK_PARTIAL();
3420 ph10 426 RRETURN(MATCH_NOMATCH);
3421 ph10 427 }
3422 ph10 184 GETCHARINCTEST(c, eptr);
3423 ph10 349 prop_script = UCD_SCRIPT(c);
3424 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3425     RRETURN(MATCH_NOMATCH);
3426     }
3427     break;
3428    
3429     default:
3430     RRETURN(PCRE_ERROR_INTERNAL);
3431 nigel 77 }
3432     }
3433    
3434     /* Match extended Unicode sequences. We will get here only if the
3435     support is in the binary; otherwise a compile-time error occurs. */
3436    
3437     else if (ctype == OP_EXTUNI)
3438     {
3439     for (i = 1; i <= min; i++)
3440     {
3441 ph10 427 if (eptr >= md->end_subject)
3442 ph10 426 {
3443 ph10 427 SCHECK_PARTIAL();
3444 ph10 426 RRETURN(MATCH_NOMATCH);
3445 ph10 427 }
3446 nigel 77 GETCHARINCTEST(c, eptr);
3447 ph10 349 prop_category = UCD_CATEGORY(c);
3448 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3449     while (eptr < md->end_subject)
3450     {
3451     int len = 1;
3452 ph10 426 if (!utf8) c = *eptr;
3453     else { GETCHARLEN(c, eptr, len); }
3454 ph10 349 prop_category = UCD_CATEGORY(c);
3455 nigel 77 if (prop_category != ucp_M) break;
3456     eptr += len;
3457     }
3458     }
3459     }
3460    
3461     else
3462     #endif /* SUPPORT_UCP */
3463    
3464     /* Handle all other cases when the coding is UTF-8 */
3465    
3466     #ifdef SUPPORT_UTF8
3467     if (utf8) switch(ctype)
3468     {
3469     case OP_ANY:
3470     for (i = 1; i <= min; i++)
3471     {
3472 ph10 426 if (eptr >= md->end_subject)
3473     {
3474 ph10 427 SCHECK_PARTIAL();
3475 nigel 77 RRETURN(MATCH_NOMATCH);
3476 ph10 427 }
3477 ph10 426 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3478 nigel 91 eptr++;
3479 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3480     }
3481     break;
3482    
3483 ph10 341 case OP_ALLANY:
3484     for (i = 1; i <= min; i++)
3485     {
3486 ph10 427 if (eptr >= md->end_subject)
3487 ph10 426 {
3488     SCHECK_PARTIAL();
3489     RRETURN(MATCH_NOMATCH);
3490 ph10 427 }
3491 ph10 341 eptr++;
3492     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3493     }
3494     break;
3495    
3496 nigel 77 case OP_ANYBYTE:
3497 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3498 nigel 77 eptr += min;
3499     break;
3500    
3501 nigel 93 case OP_ANYNL:
3502     for (i = 1; i <= min; i++)
3503     {
3504 ph10 427 if (eptr >= md->end_subject)
3505 ph10 426 {
3506     SCHECK_PARTIAL();
3507     RRETURN(MATCH_NOMATCH);
3508 ph10 427 }
3509 nigel 93 GETCHARINC(c, eptr);
3510     switch(c)
3511     {
3512     default: RRETURN(MATCH_NOMATCH);
3513     case 0x000d:
3514     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3515     break;
3516 ph10 231
3517 nigel 93 case 0x000a:
3518 ph10 231 break;
3519    
3520 nigel 93 case 0x000b:
3521     case 0x000c:
3522     case 0x0085:
3523     case 0x2028:
3524     case 0x2029:
3525 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3526 nigel 93 break;
3527     }
3528     }
3529     break;
3530    
3531 ph10 178 case OP_NOT_HSPACE:
3532     for (i = 1; i <= min; i++)
3533     {
3534 ph10 427 if (eptr >= md->end_subject)
3535 ph10 426 {
3536     SCHECK_PARTIAL();
3537     RRETURN(MATCH_NOMATCH);
3538 ph10 427 }
3539 ph10 178 GETCHARINC(c, eptr);
3540     switch(c)
3541     {
3542     default: break;
3543     case 0x09: /* HT */
3544     case 0x20: /* SPACE */
3545     case 0xa0: /* NBSP */
3546     case 0x1680: /* OGHAM SPACE MARK */
3547     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3548     case 0x2000: /* EN QUAD */
3549     case 0x2001: /* EM QUAD */
3550     case 0x2002: /* EN SPACE */
3551     case 0x2003: /* EM SPACE */
3552     case 0x2004: /* THREE-PER-EM SPACE */
3553     case 0x2005: /* FOUR-PER-EM SPACE */
3554     case 0x2006: /* SIX-PER-EM SPACE */
3555     case 0x2007: /* FIGURE SPACE */
3556     case 0x2008: /* PUNCTUATION SPACE */
3557     case 0x2009: /* THIN SPACE */
3558     case 0x200A: /* HAIR SPACE */
3559     case 0x202f: /* NARROW NO-BREAK SPACE */
3560     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3561     case 0x3000: /* IDEOGRAPHIC SPACE */
3562     RRETURN(MATCH_NOMATCH);
3563     }
3564     }
3565     break;
3566 ph10 182
3567 ph10 178 case OP_HSPACE:
3568     for (i = 1; i <= min; i++)
3569     {
3570 ph10 427 if (eptr >= md->end_subject)
3571 ph10 426 {
3572 ph10 427 SCHECK_PARTIAL();
3573 ph10 426 RRETURN(MATCH_NOMATCH);
3574 ph10 427 }
3575 ph10 178 GETCHARINC(c, eptr);
3576     switch(c)
3577     {
3578     default: RRETURN(MATCH_NOMATCH);
3579     case 0x09: /* HT */
3580     case 0x20: /* SPACE */
3581     case 0xa0: /* NBSP */
3582     case 0x1680: /* OGHAM SPACE MARK */
3583     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3584     case 0x2000: /* EN QUAD */
3585     case 0x2001: /* EM QUAD */
3586     case 0x2002: /* EN SPACE */
3587     case 0x2003: /* EM SPACE */
3588     case 0x2004: /* THREE-PER-EM SPACE */
3589     case 0x2005: /* FOUR-PER-EM SPACE */
3590     case 0x2006: /* SIX-PER-EM SPACE */
3591     case 0x2007: /* FIGURE SPACE */
3592     case 0x2008: /* PUNCTUATION SPACE */
3593     case 0x2009: /* THIN SPACE */
3594     case 0x200A: /* HAIR SPACE */
3595     case 0x202f: /* NARROW NO-BREAK SPACE */
3596     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3597     case 0x3000: /* IDEOGRAPHIC SPACE */
3598     break;
3599     }
3600     }
3601     break;
3602 ph10 182
3603 ph10 178 case OP_NOT_VSPACE:
3604     for (i = 1; i <= min; i++)
3605     {
3606 ph10 427 if (eptr >= md->end_subject)
3607 ph10 426 {
3608 ph10 427 SCHECK_PARTIAL();
3609 ph10 426 RRETURN(MATCH_NOMATCH);
3610 ph10 427 }
3611 ph10 178 GETCHARINC(c, eptr);
3612     switch(c)
3613     {
3614     default: break;
3615     case 0x0a: /* LF */
3616     case 0x0b: /* VT */
3617     case 0x0c: /* FF */
3618     case 0x0d: /* CR */
3619     case 0x85: /* NEL */
3620     case 0x2028: /* LINE SEPARATOR */
3621     case 0x2029: /* PARAGRAPH SEPARATOR */
3622     RRETURN(MATCH_NOMATCH);
3623     }
3624     }
3625     break;
3626 ph10 182
3627 ph10 178 case OP_VSPACE:
3628     for (i = 1; i <= min; i++)
3629     {
3630 ph10 427 if (eptr >= md->end_subject)
3631 ph10 426 {
3632 ph10 427 SCHECK_PARTIAL();
3633 ph10 426 RRETURN(MATCH_NOMATCH);
3634 ph10 427 }
3635 ph10 178 GETCHARINC(c, eptr);
3636     switch(c)
3637     {
3638     default: RRETURN(MATCH_NOMATCH);
3639     case 0x0a: /* LF */
3640     case 0x0b: /* VT */
3641     case 0x0c: /* FF */
3642     case 0x0d: /* CR */
3643     case 0x85: /* NEL */
3644     case 0x2028: /* LINE SEPARATOR */
3645     case 0x2029: /* PARAGRAPH SEPARATOR */
3646 ph10 182 break;
3647 ph10 178 }
3648     }
3649     break;
3650    
3651 nigel 77 case OP_NOT_DIGIT:
3652     for (i = 1; i <= min; i++)
3653     {
3654 ph10 427 if (eptr >= md->end_subject)
3655 ph10 426 {
3656 ph10 427 SCHECK_PARTIAL();
3657 ph10 426 RRETURN(MATCH_NOMATCH);
3658 ph10 427 }
3659 nigel 77 GETCHARINC(c, eptr);
3660     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3661     RRETURN(MATCH_NOMATCH);
3662     }
3663     break;
3664    
3665     case OP_DIGIT:
3666     for (i = 1; i <= min; i++)
3667     {
3668 ph10 427 if (eptr >= md->end_subject)
3669 ph10 426 {
3670 ph10 427 SCHECK_PARTIAL();
3671 nigel 77 RRETURN(MATCH_NOMATCH);
3672 ph10 427 }
3673 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3674     RRETURN(MATCH_NOMATCH);
3675 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3676     }
3677     break;
3678    
3679     case OP_NOT_WHITESPACE:
3680     for (i = 1; i <= min; i++)
3681     {
3682 ph10 427 if (eptr >= md->end_subject)
3683 ph10 426 {
3684 ph10 427 SCHECK_PARTIAL();
3685 nigel 77 RRETURN(MATCH_NOMATCH);
3686 ph10 427 }
3687 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3688     RRETURN(MATCH_NOMATCH);
3689 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3690 nigel 77 }
3691     break;
3692    
3693     case OP_WHITESPACE:
3694     for (i = 1; i <= min; i++)
3695     {
3696 ph10 427 if (eptr >= md->end_subject)
3697 ph10 426 {
3698 ph10 427 SCHECK_PARTIAL();
3699 nigel 77 RRETURN(MATCH_NOMATCH);
3700 ph10 427 }
3701 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3702     RRETURN(MATCH_NOMATCH);
3703 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3704     }
3705     break;
3706    
3707     case OP_NOT_WORDCHAR:
3708     for (i = 1; i <= min; i++)
3709     {
3710 ph10 482 if (eptr >= md->end_subject)
3711     {
3712     SCHECK_PARTIAL();
3713 nigel 77 RRETURN(MATCH_NOMATCH);
3714 ph10 482 }
3715     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3716     RRETURN(MATCH_NOMATCH);
3717 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3718 nigel 77 }
3719     break;
3720    
3721     case OP_WORDCHAR:
3722     for (i = 1; i <= min; i++)
3723     {
3724 ph10 427 if (eptr >= md->end_subject)
3725 ph10 426 {
3726 ph10 427 SCHECK_PARTIAL();
3727 nigel 77 RRETURN(MATCH_NOMATCH);
3728 ph10 427 }
3729 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3730     RRETURN(MATCH_NOMATCH);
3731 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3732     }
3733     break;
3734    
3735     default:
3736     RRETURN(PCRE_ERROR_INTERNAL);
3737     } /* End switch(ctype) */
3738    
3739     else
3740     #endif /* SUPPORT_UTF8 */
3741    
3742     /* Code for the non-UTF-8 case for minimum matching of operators other
3743 ph10 426 than OP_PROP and OP_NOTPROP. */
3744 nigel 77
3745     switch(ctype)
3746     {
3747     case OP_ANY:
3748 ph10 342 for (i = 1; i <= min; i++)
3749 nigel 77 {
3750 ph10 427 if (eptr >= md->end_subject)
3751 ph10 426 {
3752 ph10 427 SCHECK_PARTIAL();
3753 ph10 426 RRETURN(MATCH_NOMATCH);
3754 ph10 427 }
3755 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3756     eptr++;
3757 nigel 77 }
3758     break;
3759    
3760 ph10 341 case OP_ALLANY:
3761 ph10 443 if (eptr > md->end_subject - min)
3762 ph10 428 {
3763 ph10 443 SCHECK_PARTIAL();
3764 ph10 428 RRETURN(MATCH_NOMATCH);
3765 ph10 443 }
3766 ph10 341 eptr += min;
3767     break;
3768    
3769 nigel 77 case OP_ANYBYTE:
3770 ph10 443 if (eptr > md->end_subject - min)
3771 ph10 428 {
3772 ph10 443 SCHECK_PARTIAL();
3773 ph10 428 RRETURN(MATCH_NOMATCH);
3774 ph10 443 }
3775 nigel 77 eptr += min;
3776     break;
3777    
3778 nigel 93 case OP_ANYNL:
3779     for (i = 1; i <= min; i++)
3780     {
3781 ph10 427 if (eptr >= md->end_subject)
3782 ph10 426 {
3783 ph10 427 SCHECK_PARTIAL();
3784 ph10 426 RRETURN(MATCH_NOMATCH);
3785 ph10 427 }
3786 nigel 93 switch(*eptr++)
3787     {
3788     default: RRETURN(MATCH_NOMATCH);
3789     case 0x000d:
3790     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3791     break;
3792     case 0x000a:
3793 ph10 231 break;
3794    
3795 nigel 93 case 0x000b:
3796     case 0x000c:
3797     case 0x0085:
3798 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3799 nigel 93 break;
3800     }
3801     }
3802     break;
3803    
3804 ph10 178 case OP_NOT_HSPACE:
3805     for (i = 1; i <= min; i++)
3806     {
3807 ph10 427 if (eptr >= md->end_subject)
3808 ph10 426 {
3809 ph10 427 SCHECK_PARTIAL();
3810 ph10 426 RRETURN(MATCH_NOMATCH);
3811 ph10 427 }
3812 ph10 178 switch(*eptr++)
3813     {
3814     default: break;
3815     case 0x09: /* HT */
3816     case 0x20: /* SPACE */
3817     case 0xa0: /* NBSP */
3818     RRETURN(MATCH_NOMATCH);
3819     }
3820     }
3821     break;
3822    
3823     case OP_HSPACE:
3824     for (i = 1; i <= min; i++)
3825     {
3826 ph10 427 if (eptr >= md->end_subject)
3827 ph10 426 {
3828 ph10 427 SCHECK_PARTIAL();
3829 ph10 426 RRETURN(MATCH_NOMATCH);
3830 ph10 427 }
3831 ph10 178 switch(*eptr++)
3832     {
3833     default: RRETURN(MATCH_NOMATCH);
3834     case 0x09: /* HT */
3835     case 0x20: /* SPACE */
3836     case 0xa0: /* NBSP */
3837 ph10 182 break;
3838 ph10 178 }
3839     }
3840     break;
3841    
3842     case OP_NOT_VSPACE:
3843     for (i = 1; i <= min; i++)
3844     {
3845 ph10 427 if (eptr >= md->end_subject)
3846 ph10 426 {
3847 ph10 427 SCHECK_PARTIAL();
3848 ph10 426 RRETURN(MATCH_NOMATCH);
3849 ph10 427 }
3850 ph10 178 switch(*eptr++)
3851     {
3852     default: break;
3853     case 0x0a: /* LF */
3854     case 0x0b: /* VT */
3855     case 0x0c: /* FF */
3856     case 0x0d: /* CR */
3857     case 0x85: /* NEL */
3858     RRETURN(MATCH_NOMATCH);
3859     }
3860     }
3861     break;
3862    
3863     case OP_VSPACE:
3864     for (i = 1; i <= min; i++)
3865     {
3866 ph10 427 if (eptr >= md->end_subject)
3867 ph10 426 {
3868 ph10 427 SCHECK_PARTIAL();
3869 ph10 426 RRETURN(MATCH_NOMATCH);
3870 ph10 427 }
3871 ph10 178 switch(*eptr++)
3872     {
3873     default: RRETURN(MATCH_NOMATCH);
3874     case 0x0a: /* LF */
3875     case 0x0b: /* VT */
3876     case 0x0c: /* FF */
3877     case 0x0d: /* CR */
3878     case 0x85: /* NEL */
3879 ph10 182 break;
3880 ph10 178 }
3881     }
3882     break;
3883    
3884 nigel 77 case OP_NOT_DIGIT:
3885     for (i = 1; i <= min; i++)
3886 ph10 427 {
3887     if (eptr >= md->end_subject)
3888 ph10 426 {
3889 ph10 427 SCHECK_PARTIAL();
3890 ph10 426 RRETURN(MATCH_NOMATCH);
3891 ph10 427 }
3892 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3893 ph10 427 }
3894 nigel 77 break;
3895    
3896     case OP_DIGIT:
3897     for (i = 1; i <= min; i++)
3898 ph10 427 {
3899     if (eptr >= md->end_subject)
3900 ph10 426 {
3901 ph10 427 SCHECK_PARTIAL();
3902 ph10 426 RRETURN(MATCH_NOMATCH);
3903 ph10 427 }
3904 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3905 ph10 427 }
3906 nigel 77 break;
3907    
3908     case OP_NOT_WHITESPACE:
3909     for (i = 1; i <= min; i++)
3910 ph10 427 {
3911