/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 597 - (hide annotations) (download)
Mon May 2 17:08:52 2011 UTC (2 years ago) by ph10
File MIME type: text/plain
File size: 187796 byte(s)
Complete incomplete fix for UTF-8 caseless references of different lengths.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135 ph10 595 /* Normally, if a back reference hasn't been set, the length that is passed is
136     negative, so the match always fails. However, in JavaScript compatibility mode,
137     the length passed is zero. Note that in caseless UTF-8 mode, the number of
138     subject bytes matched may be different to the number of reference bytes.
139 nigel 77
140     Arguments:
141     offset index into the offset vector
142 ph10 595 eptr pointer into the subject
143     length length of reference to be matched (number of bytes)
144 nigel 77 md points to match data block
145     ims the ims flags
146    
147 ph10 595 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 nigel 77 */
149    
150 ph10 595 static int
151 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 nigel 77 unsigned long int ims)
153     {
154 ph10 595 USPTR eptr_start = eptr;
155     register USPTR p = md->start_subject + md->offset_vector[offset];
156 nigel 77
157 ph10 475 #ifdef PCRE_DEBUG
158 nigel 77 if (eptr >= md->end_subject)
159     printf("matching subject <null>");
160     else
161     {
162     printf("matching subject ");
163     pchars(eptr, length, TRUE, md);
164     }
165     printf(" against backref ");
166     pchars(p, length, FALSE, md);
167     printf("\n");
168     #endif
169    
170 ph10 595 /* Always fail if reference not set (and not JavaScript compatible). */
171 nigel 77
172 ph10 595 if (length < 0) return -1;
173 nigel 77
174 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175     properly if Unicode properties are supported. Otherwise, we can check only
176     ASCII characters. */
177 nigel 77
178     if ((ims & PCRE_CASELESS) != 0)
179     {
180 ph10 354 #ifdef SUPPORT_UTF8
181     #ifdef SUPPORT_UCP
182     if (md->utf8)
183     {
184 ph10 595 /* Match characters up to the end of the reference. NOTE: the number of
185     bytes matched may differ, because there are some characters whose upper and
186     lower case versions code as different numbers of bytes. For example, U+023A
187     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189     the latter. It is important, therefore, to check the length along the
190     reference, not along the subject (earlier code did this wrong). */
191    
192     USPTR endptr = p + length;
193     while (p < endptr)
194 ph10 354 {
195 ph10 358 int c, d;
196 ph10 597 if (eptr >= md->end_subject) return -1;
197 ph10 354 GETCHARINC(c, eptr);
198     GETCHARINC(d, p);
199 ph10 595 if (c != d && c != UCD_OTHERCASE(d)) return -1;
200 ph10 358 }
201     }
202 ph10 354 else
203     #endif
204     #endif
205    
206     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207     is no UCP support. */
208 ph10 597 {
209     if (eptr + length > md->end_subject) return -1;
210     while (length-- > 0)
211     { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212     }
213 nigel 77 }
214 ph10 358
215 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
216     are in UTF-8 mode. */
217 ph10 358
218 nigel 77 else
219 ph10 597 {
220     if (eptr + length > md->end_subject) return -1;
221     while (length-- > 0) if (*p++ != *eptr++) return -1;
222     }
223 nigel 77
224 ph10 595 return eptr - eptr_start;
225 nigel 77 }
226    
227    
228    
229     /***************************************************************************
230     ****************************************************************************
231     RECURSION IN THE match() FUNCTION
232    
233 nigel 87 The match() function is highly recursive, though not every recursive call
234     increases the recursive depth. Nevertheless, some regular expressions can cause
235     it to recurse to a great depth. I was writing for Unix, so I just let it call
236     itself recursively. This uses the stack for saving everything that has to be
237     saved for a recursive call. On Unix, the stack can be large, and this works
238     fine.
239 nigel 77
240 nigel 87 It turns out that on some non-Unix-like systems there are problems with
241     programs that use a lot of stack. (This despite the fact that every last chip
242     has oodles of memory these days, and techniques for extending the stack have
243     been known for decades.) So....
244 nigel 77
245     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246     calls by keeping local variables that need to be preserved in blocks of memory
247 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
248 nigel 77 achieve this so that the actual code doesn't look very different to what it
249     always used to.
250 ph10 164
251 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
252 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
253     Switzer, the use of longjmp() has been abolished, at the cost of having to
254     provide a unique number for each call to RMATCH. There is no way of generating
255     a sequence of numbers at compile time in C. I have given them names, to make
256     them stand out more clearly.
257    
258     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
261     don't have indeterminate values; this has meant that the frame size can be
262 ph10 164 reduced because the result can be "passed back" by straight setting of the
263     variable instead of being passed in the frame.
264 nigel 77 ****************************************************************************
265     ***************************************************************************/
266    
267 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268     below must be updated in sync. */
269 nigel 77
270 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276     RM61, RM62 };
277 ph10 164
278 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
279 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
280 ph10 501 actually used in this definition. */
281 nigel 77
282     #ifndef NO_RECURSE
283     #define REGISTER register
284 ph10 164
285 ph10 475 #ifdef PCRE_DEBUG
286 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
287 nigel 87 { \
288     printf("match() called in line %d\n", __LINE__); \
289 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
290 nigel 87 printf("to line %d\n", __LINE__); \
291     }
292     #define RRETURN(ra) \
293     { \
294     printf("match() returned %d from line %d ", ra, __LINE__); \
295     return ra; \
296     }
297     #else
298 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
299 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
300 nigel 77 #define RRETURN(ra) return ra
301 nigel 87 #endif
302    
303 nigel 77 #else
304    
305    
306 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
307     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308     argument of match(), which never changes. */
309 nigel 77
310     #define REGISTER
311    
312 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313 nigel 77 {\
314 ph10 563 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316 ph10 164 frame->Xwhere = rw; \
317     newframe->Xeptr = ra;\
318     newframe->Xecode = rb;\
319 ph10 168 newframe->Xmstart = mstart;\
320 ph10 501 newframe->Xmarkptr = markptr;\
321 ph10 164 newframe->Xoffset_top = rc;\
322     newframe->Xims = re;\
323     newframe->Xeptrb = rf;\
324     newframe->Xflags = rg;\
325     newframe->Xrdepth = frame->Xrdepth + 1;\
326     newframe->Xprevframe = frame;\
327     frame = newframe;\
328     DPRINTF(("restarting from line %d\n", __LINE__));\
329     goto HEAP_RECURSE;\
330     L_##rw:\
331     DPRINTF(("jumped back to line %d\n", __LINE__));\
332 nigel 77 }
333    
334     #define RRETURN(ra)\
335     {\
336 ph10 527 heapframe *oldframe = frame;\
337     frame = oldframe->Xprevframe;\
338     (pcre_stack_free)(oldframe);\
339 nigel 77 if (frame != NULL)\
340     {\
341 ph10 164 rrc = ra;\
342     goto HEAP_RETURN;\
343 nigel 77 }\
344     return ra;\
345     }
346    
347    
348     /* Structure for remembering the local variables in a private frame */
349    
350     typedef struct heapframe {
351     struct heapframe *Xprevframe;
352    
353     /* Function arguments that may change */
354    
355 ph10 409 USPTR Xeptr;
356 nigel 77 const uschar *Xecode;
357 ph10 409 USPTR Xmstart;
358 ph10 501 USPTR Xmarkptr;
359 nigel 77 int Xoffset_top;
360     long int Xims;
361     eptrblock *Xeptrb;
362     int Xflags;
363 nigel 91 unsigned int Xrdepth;
364 nigel 77
365     /* Function local variables */
366    
367 ph10 409 USPTR Xcallpat;
368 ph10 406 #ifdef SUPPORT_UTF8
369 ph10 409 USPTR Xcharptr;
370 ph10 406 #endif
371 ph10 409 USPTR Xdata;
372     USPTR Xnext;
373     USPTR Xpp;
374     USPTR Xprev;
375     USPTR Xsaved_eptr;
376 nigel 77
377     recursion_info Xnew_recursive;
378    
379     BOOL Xcur_is_word;
380     BOOL Xcondition;
381     BOOL Xprev_is_word;
382    
383     unsigned long int Xoriginal_ims;
384    
385     #ifdef SUPPORT_UCP
386     int Xprop_type;
387 nigel 87 int Xprop_value;
388 nigel 77 int Xprop_fail_result;
389     int Xprop_category;
390     int Xprop_chartype;
391 nigel 87 int Xprop_script;
392 ph10 123 int Xoclength;
393     uschar Xocchars[8];
394 nigel 77 #endif
395    
396 ph10 403 int Xcodelink;
397 nigel 77 int Xctype;
398 nigel 93 unsigned int Xfc;
399 nigel 77 int Xfi;
400     int Xlength;
401     int Xmax;
402     int Xmin;
403     int Xnumber;
404     int Xoffset;
405     int Xop;
406     int Xsave_capture_last;
407     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408     int Xstacksave[REC_STACK_SAVE_MAX];
409    
410     eptrblock Xnewptrb;
411    
412 ph10 164 /* Where to jump back to */
413 nigel 77
414 ph10 164 int Xwhere;
415 ph10 165
416 nigel 77 } heapframe;
417    
418     #endif
419    
420    
421     /***************************************************************************
422     ***************************************************************************/
423    
424    
425    
426     /*************************************************
427     * Match from current position *
428     *************************************************/
429    
430 nigel 93 /* This function is called recursively in many circumstances. Whenever it
431 nigel 77 returns a negative (error) response, the outer incarnation must also return the
432 ph10 426 same response. */
433 nigel 77
434 ph10 426 /* These macros pack up tests that are used for partial matching, and which
435     appears several times in the code. We set the "hit end" flag if the pointer is
436     at the end of the subject and also past the start of the subject (i.e.
437 ph10 427 something has been matched). For hard partial matching, we then return
438     immediately. The second one is used when we already know we are past the end of
439     the subject. */
440 ph10 426
441     #define CHECK_PARTIAL()\
442 ph10 553 if (md->partial != 0 && eptr >= md->end_subject && \
443     eptr > md->start_used_ptr) \
444     { \
445     md->hitend = TRUE; \
446     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447 ph10 427 }
448 ph10 426
449     #define SCHECK_PARTIAL()\
450 ph10 553 if (md->partial != 0 && eptr > md->start_used_ptr) \
451     { \
452     md->hitend = TRUE; \
453     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454 ph10 427 }
455 ph10 426
456 ph10 427
457 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
458     the md structure (e.g. utf8, end_subject) into individual variables to improve
459 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
460     made performance worse.
461    
462     Arguments:
463 nigel 93 eptr pointer to current character in subject
464     ecode pointer to current position in compiled code
465 ph10 168 mstart pointer to the current match start position (can be modified
466 ph10 172 by encountering \K)
467 ph10 501 markptr pointer to the most recent MARK name, or NULL
468 nigel 77 offset_top current top pointer
469     md pointer to "static" info for the match
470     ims current /i, /m, and /s options
471     eptrb pointer to chain of blocks containing eptr at start of
472     brackets - for testing for empty matches
473     flags can contain
474     match_condassert - this is an assertion condition
475 nigel 93 match_cbegroup - this is the start of an unlimited repeat
476     group that can match an empty string
477 nigel 87 rdepth the recursion depth
478 nigel 77
479     Returns: MATCH_MATCH if matched ) these values are >= 0
480     MATCH_NOMATCH if failed to match )
481 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
482 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
483 nigel 87 (e.g. stopped by repeated call or recursion limit)
484 nigel 77 */
485    
486     static int
487 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
488     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
489 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
490 nigel 77 {
491     /* These variables do not need to be preserved over recursion in this function,
492 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
493     "register" because they are used a lot in loops. */
494 nigel 77
495 nigel 91 register int rrc; /* Returns from recursive calls */
496     register int i; /* Used for loops not involving calls to RMATCH() */
497 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
498 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
499 nigel 77
500 nigel 93 BOOL minimize, possessive; /* Quantifier options */
501 ph10 403 int condcode;
502 nigel 93
503 nigel 77 /* When recursion is not being used, all "local" variables that have to be
504     preserved over calls to RMATCH() are part of a "frame" which is obtained from
505     heap storage. Set up the top-level frame here; others are obtained from the
506     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507    
508     #ifdef NO_RECURSE
509 ph10 563 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
512    
513     /* Copy in the original argument variables */
514    
515     frame->Xeptr = eptr;
516     frame->Xecode = ecode;
517 ph10 168 frame->Xmstart = mstart;
518 ph10 501 frame->Xmarkptr = markptr;
519 nigel 77 frame->Xoffset_top = offset_top;
520     frame->Xims = ims;
521     frame->Xeptrb = eptrb;
522     frame->Xflags = flags;
523 nigel 87 frame->Xrdepth = rdepth;
524 nigel 77
525     /* This is where control jumps back to to effect "recursion" */
526    
527     HEAP_RECURSE:
528    
529     /* Macros make the argument variables come from the current frame */
530    
531     #define eptr frame->Xeptr
532     #define ecode frame->Xecode
533 ph10 168 #define mstart frame->Xmstart
534 ph10 501 #define markptr frame->Xmarkptr
535 nigel 77 #define offset_top frame->Xoffset_top
536     #define ims frame->Xims
537     #define eptrb frame->Xeptrb
538     #define flags frame->Xflags
539 nigel 87 #define rdepth frame->Xrdepth
540 nigel 77
541     /* Ditto for the local variables */
542    
543     #ifdef SUPPORT_UTF8
544     #define charptr frame->Xcharptr
545     #endif
546     #define callpat frame->Xcallpat
547 ph10 403 #define codelink frame->Xcodelink
548 nigel 77 #define data frame->Xdata
549     #define next frame->Xnext
550     #define pp frame->Xpp
551     #define prev frame->Xprev
552     #define saved_eptr frame->Xsaved_eptr
553    
554     #define new_recursive frame->Xnew_recursive
555    
556     #define cur_is_word frame->Xcur_is_word
557     #define condition frame->Xcondition
558     #define prev_is_word frame->Xprev_is_word
559    
560     #define original_ims frame->Xoriginal_ims
561    
562     #ifdef SUPPORT_UCP
563     #define prop_type frame->Xprop_type
564 nigel 87 #define prop_value frame->Xprop_value
565 nigel 77 #define prop_fail_result frame->Xprop_fail_result
566     #define prop_category frame->Xprop_category
567     #define prop_chartype frame->Xprop_chartype
568 nigel 87 #define prop_script frame->Xprop_script
569 ph10 115 #define oclength frame->Xoclength
570     #define occhars frame->Xocchars
571 nigel 77 #endif
572    
573     #define ctype frame->Xctype
574     #define fc frame->Xfc
575     #define fi frame->Xfi
576     #define length frame->Xlength
577     #define max frame->Xmax
578     #define min frame->Xmin
579     #define number frame->Xnumber
580     #define offset frame->Xoffset
581     #define op frame->Xop
582     #define save_capture_last frame->Xsave_capture_last
583     #define save_offset1 frame->Xsave_offset1
584     #define save_offset2 frame->Xsave_offset2
585     #define save_offset3 frame->Xsave_offset3
586     #define stacksave frame->Xstacksave
587    
588     #define newptrb frame->Xnewptrb
589    
590     /* When recursion is being used, local variables are allocated on the stack and
591     get preserved during recursion in the normal way. In this environment, fi and
592     i, and fc and c, can be the same variables. */
593    
594 nigel 93 #else /* NO_RECURSE not defined */
595 nigel 77 #define fi i
596     #define fc c
597    
598    
599 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
600     const uschar *charptr; /* in small blocks of the code. My normal */
601     #endif /* style of coding would have declared */
602     const uschar *callpat; /* them within each of those blocks. */
603     const uschar *data; /* However, in order to accommodate the */
604     const uschar *next; /* version of this code that uses an */
605     USPTR pp; /* external "stack" implemented on the */
606     const uschar *prev; /* heap, it is easier to declare them all */
607     USPTR saved_eptr; /* here, so the declarations can be cut */
608     /* out in a block. The only declarations */
609     recursion_info new_recursive; /* within blocks below are for variables */
610     /* that do not have to be preserved over */
611     BOOL cur_is_word; /* a recursive call to RMATCH(). */
612     BOOL condition;
613 nigel 77 BOOL prev_is_word;
614    
615     unsigned long int original_ims;
616    
617     #ifdef SUPPORT_UCP
618     int prop_type;
619 nigel 87 int prop_value;
620 nigel 77 int prop_fail_result;
621     int prop_category;
622     int prop_chartype;
623 nigel 87 int prop_script;
624 ph10 115 int oclength;
625     uschar occhars[8];
626 nigel 77 #endif
627    
628 ph10 399 int codelink;
629 nigel 77 int ctype;
630     int length;
631     int max;
632     int min;
633     int number;
634     int offset;
635     int op;
636     int save_capture_last;
637     int save_offset1, save_offset2, save_offset3;
638     int stacksave[REC_STACK_SAVE_MAX];
639    
640     eptrblock newptrb;
641 nigel 93 #endif /* NO_RECURSE */
642 nigel 77
643     /* These statements are here to stop the compiler complaining about unitialized
644     variables. */
645    
646     #ifdef SUPPORT_UCP
647 nigel 87 prop_value = 0;
648 nigel 77 prop_fail_result = 0;
649     #endif
650    
651 nigel 93
652 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
653     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
654     used. Thanks to Ian Taylor for noticing this possibility and sending the
655     original patch. */
656    
657     TAIL_RECURSE:
658    
659 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
660     are specified by the macro RMATCH and RRETURN is used to return. When
661     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
662 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
663 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
664     complicated macro. It has to be used in one particular way. This shouldn't,
665     however, impact performance when true recursion is being used. */
666 nigel 77
667 ph10 164 #ifdef SUPPORT_UTF8
668     utf8 = md->utf8; /* Local copy of the flag */
669     #else
670     utf8 = FALSE;
671     #endif
672    
673 nigel 87 /* First check that we haven't called match() too many times, or that we
674     haven't exceeded the recursive call limit. */
675    
676 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
677 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
678 nigel 77
679     original_ims = ims; /* Save for resetting on ')' */
680 nigel 91
681 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
682     string, the match_cbegroup flag is set. When this is the case, add the current
683     subject pointer to the chain of such remembered pointers, to be checked when we
684     hit the closing ket, in order to break infinite loops that match no characters.
685 ph10 197 When match() is called in other circumstances, don't add to the chain. The
686     match_cbegroup flag must NOT be used with tail recursion, because the memory
687     block that is used is on the stack, so a new one may be required for each
688     match(). */
689 nigel 77
690 nigel 93 if ((flags & match_cbegroup) != 0)
691 nigel 77 {
692 ph10 197 newptrb.epb_saved_eptr = eptr;
693     newptrb.epb_prev = eptrb;
694     eptrb = &newptrb;
695 nigel 77 }
696    
697 nigel 93 /* Now start processing the opcodes. */
698 nigel 77
699     for (;;)
700     {
701 nigel 93 minimize = possessive = FALSE;
702 nigel 77 op = *ecode;
703 ph10 443
704 nigel 93 switch(op)
705     {
706 ph10 510 case OP_MARK:
707     markptr = ecode + 2;
708     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
709 ph10 512 ims, eptrb, flags, RM55);
710    
711     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
712     argument, and we must check whether that argument matches this MARK's
713     argument. It is passed back in md->start_match_ptr (an overloading of that
714     variable). If it does match, we reset that variable to the current subject
715     position and return MATCH_SKIP. Otherwise, pass back the return code
716 ph10 510 unaltered. */
717 ph10 512
718     if (rrc == MATCH_SKIP_ARG &&
719 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
720     {
721     md->start_match_ptr = eptr;
722     RRETURN(MATCH_SKIP);
723     }
724    
725 ph10 512 if (md->mark == NULL) md->mark = markptr;
726 ph10 510 RRETURN(rrc);
727    
728 ph10 210 case OP_FAIL:
729 ph10 510 MRRETURN(MATCH_NOMATCH);
730 ph10 211
731 ph10 551 /* COMMIT overrides PRUNE, SKIP, and THEN */
732 ph10 553
733 ph10 510 case OP_COMMIT:
734     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735     ims, eptrb, flags, RM52);
736 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737 ph10 553 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738     rrc != MATCH_THEN)
739 ph10 551 RRETURN(rrc);
740 ph10 510 MRRETURN(MATCH_COMMIT);
741    
742 ph10 551 /* PRUNE overrides THEN */
743 ph10 553
744 ph10 210 case OP_PRUNE:
745     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746     ims, eptrb, flags, RM51);
747 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 ph10 510 MRRETURN(MATCH_PRUNE);
749 ph10 211
750 ph10 510 case OP_PRUNE_ARG:
751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ph10 512 ims, eptrb, flags, RM56);
753 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754 ph10 510 md->mark = ecode + 2;
755     RRETURN(MATCH_PRUNE);
756 ph10 211
757 ph10 551 /* SKIP overrides PRUNE and THEN */
758 ph10 553
759 ph10 210 case OP_SKIP:
760     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761     ims, eptrb, flags, RM53);
762 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763 ph10 551 RRETURN(rrc);
764 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
765 ph10 510 MRRETURN(MATCH_SKIP);
766 ph10 211
767 ph10 510 case OP_SKIP_ARG:
768     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769 ph10 512 ims, eptrb, flags, RM57);
770 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771 ph10 551 RRETURN(rrc);
772 ph10 512
773     /* Pass back the current skip name by overloading md->start_match_ptr and
774     returning the special MATCH_SKIP_ARG return code. This will either be
775     caught by a matching MARK, or get to the top, where it is treated the same
776 ph10 510 as PRUNE. */
777 ph10 512
778 ph10 510 md->start_match_ptr = ecode + 2;
779 ph10 512 RRETURN(MATCH_SKIP_ARG);
780 ph10 553
781 ph10 550 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782 ph10 553 the alt that is at the start of the current branch. This makes it possible
783     to skip back past alternatives that precede the THEN within the current
784     branch. */
785 ph10 512
786 ph10 210 case OP_THEN:
787     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788 ph10 212 ims, eptrb, flags, RM54);
789 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
791 ph10 510 MRRETURN(MATCH_THEN);
792    
793     case OP_THEN_ARG:
794 ph10 553 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795 ph10 550 offset_top, md, ims, eptrb, flags, RM58);
796 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
798     md->mark = ecode + LINK_SIZE + 2;
799 ph10 212 RRETURN(MATCH_THEN);
800 ph10 211
801 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
802     the current subject position in the working slot at the top of the vector.
803     We mustn't change the current values of the data slot, because they may be
804     set from a previous iteration of this group, and be referred to by a
805     reference inside the group.
806 nigel 77
807 nigel 93 If the bracket fails to match, we need to restore this value and also the
808     values of the final offsets, in case they were set by a previous iteration
809     of the same bracket.
810 nigel 77
811 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
812     a non-capturing bracket. Don't worry about setting the flag for the error
813     case here; that is handled in the code for KET. */
814 nigel 77
815 nigel 93 case OP_CBRA:
816     case OP_SCBRA:
817     number = GET2(ecode, 1+LINK_SIZE);
818 nigel 77 offset = number << 1;
819    
820 ph10 475 #ifdef PCRE_DEBUG
821 nigel 93 printf("start bracket %d\n", number);
822     printf("subject=");
823 nigel 77 pchars(eptr, 16, TRUE, md);
824     printf("\n");
825     #endif
826    
827     if (offset < md->offset_max)
828     {
829     save_offset1 = md->offset_vector[offset];
830     save_offset2 = md->offset_vector[offset+1];
831     save_offset3 = md->offset_vector[md->offset_end - number];
832     save_capture_last = md->capture_last;
833    
834     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835 ph10 531 md->offset_vector[md->offset_end - number] =
836 ph10 530 (int)(eptr - md->start_subject);
837 nigel 77
838 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
839 nigel 77 do
840     {
841 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842     ims, eptrb, flags, RM1);
843 ph10 550 if (rrc != MATCH_NOMATCH &&
844     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845     RRETURN(rrc);
846 nigel 77 md->capture_last = save_capture_last;
847     ecode += GET(ecode, 1);
848     }
849     while (*ecode == OP_ALT);
850    
851     DPRINTF(("bracket %d failed\n", number));
852    
853     md->offset_vector[offset] = save_offset1;
854     md->offset_vector[offset+1] = save_offset2;
855     md->offset_vector[md->offset_end - number] = save_offset3;
856    
857 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
858 nigel 77 RRETURN(MATCH_NOMATCH);
859     }
860    
861 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
862     as a non-capturing bracket. */
863 nigel 77
864 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
866    
867 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
868 nigel 77
869 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
870     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871    
872 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
873     final alternative within the brackets, we would return the result of a
874     recursive call to match() whatever happened. We can reduce stack usage by
875 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
876     is set.*/
877 nigel 77
878 nigel 93 case OP_BRA:
879     case OP_SBRA:
880     DPRINTF(("start non-capturing bracket\n"));
881     flags = (op >= OP_SBRA)? match_cbegroup : 0;
882 nigel 91 for (;;)
883 nigel 77 {
884 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
885 nigel 93 {
886 ph10 197 if (flags == 0) /* Not a possibly empty group */
887     {
888     ecode += _pcre_OP_lengths[*ecode];
889     DPRINTF(("bracket 0 tail recursion\n"));
890     goto TAIL_RECURSE;
891     }
892    
893     /* Possibly empty group; can't use tail recursion. */
894    
895     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
896     eptrb, flags, RM48);
897 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
898     RRETURN(rrc);
899 nigel 93 }
900 nigel 91
901     /* For non-final alternatives, continue the loop for a NOMATCH result;
902     otherwise return. */
903    
904 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905     eptrb, flags, RM2);
906 ph10 550 if (rrc != MATCH_NOMATCH &&
907     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908     RRETURN(rrc);
909 nigel 77 ecode += GET(ecode, 1);
910     }
911 nigel 91 /* Control never reaches here. */
912 nigel 77
913     /* Conditional group: compilation checked that there are no more than
914     two branches. If the condition is false, skipping the first branch takes us
915     past the end if there is only one branch, but that's OK because that is
916 nigel 91 exactly what going to the ket would do. As there is only one branch to be
917     obeyed, we can use tail recursion to avoid using another stack frame. */
918 nigel 77
919     case OP_COND:
920 nigel 93 case OP_SCOND:
921 ph10 399 codelink= GET(ecode, 1);
922 ph10 406
923 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
924     inserted between OP_COND and an assertion condition. */
925 ph10 392
926 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
927     {
928     if (pcre_callout != NULL)
929     {
930     pcre_callout_block cb;
931     cb.version = 1; /* Version 1 of the callout block */
932     cb.callout_number = ecode[LINK_SIZE+2];
933     cb.offset_vector = md->offset_vector;
934     cb.subject = (PCRE_SPTR)md->start_subject;
935 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
936     cb.start_match = (int)(mstart - md->start_subject);
937     cb.current_position = (int)(eptr - md->start_subject);
938 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940     cb.capture_top = offset_top/2;
941     cb.capture_last = md->capture_last;
942     cb.callout_data = md->callout_data;
943 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
944 ph10 381 if (rrc < 0) RRETURN(rrc);
945     }
946     ecode += _pcre_OP_lengths[OP_CALLOUT];
947     }
948 ph10 392
949 ph10 399 condcode = ecode[LINK_SIZE+1];
950 ph10 406
951 ph10 381 /* Now see what the actual condition is */
952 ph10 392
953 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
954 nigel 77 {
955 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
956     {
957 ph10 461 condition = FALSE;
958     ecode += GET(ecode, 1);
959     }
960 ph10 459 else
961 ph10 461 {
962 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
963     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
964 ph10 461
965 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
966     false, but the test was set up by name, scan the table to see if the
967     name refers to any other numbers, and test them. The condition is true
968     if any one is set. */
969 ph10 461
970 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
971     {
972     uschar *slotA = md->name_table;
973     for (i = 0; i < md->name_count; i++)
974 ph10 461 {
975     if (GET2(slotA, 0) == recno) break;
976 ph10 459 slotA += md->name_entry_size;
977     }
978 ph10 461
979 ph10 459 /* Found a name for the number - there can be only one; duplicate
980     names for different numbers are allowed, but not vice versa. First
981     scan down for duplicates. */
982 ph10 461
983 ph10 459 if (i < md->name_count)
984 ph10 461 {
985 ph10 459 uschar *slotB = slotA;
986     while (slotB > md->name_table)
987     {
988     slotB -= md->name_entry_size;
989     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
990     {
991     condition = GET2(slotB, 0) == md->recursive->group_num;
992 ph10 461 if (condition) break;
993     }
994 ph10 459 else break;
995 ph10 461 }
996    
997 ph10 459 /* Scan up for duplicates */
998 ph10 461
999 ph10 459 if (!condition)
1000 ph10 461 {
1001 ph10 459 slotB = slotA;
1002     for (i++; i < md->name_count; i++)
1003     {
1004     slotB += md->name_entry_size;
1005     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1006     {
1007     condition = GET2(slotB, 0) == md->recursive->group_num;
1008     if (condition) break;
1009 ph10 461 }
1010 ph10 459 else break;
1011 ph10 461 }
1012     }
1013 ph10 459 }
1014 ph10 461 }
1015    
1016 ph10 459 /* Chose branch according to the condition */
1017 ph10 461
1018 ph10 459 ecode += condition? 3 : GET(ecode, 1);
1019     }
1020 ph10 461 }
1021 nigel 93
1022 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1023 nigel 93 {
1024 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1025 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1026 ph10 461
1027 ph10 459 /* If the numbered capture is unset, but the reference was by name,
1028 ph10 461 scan the table to see if the name refers to any other numbers, and test
1029     them. The condition is true if any one is set. This is tediously similar
1030     to the code above, but not close enough to try to amalgamate. */
1031    
1032 ph10 459 if (!condition && condcode == OP_NCREF)
1033     {
1034 ph10 461 int refno = offset >> 1;
1035 ph10 459 uschar *slotA = md->name_table;
1036 ph10 461
1037 ph10 459 for (i = 0; i < md->name_count; i++)
1038 ph10 461 {
1039     if (GET2(slotA, 0) == refno) break;
1040 ph10 459 slotA += md->name_entry_size;
1041     }
1042 ph10 461
1043     /* Found a name for the number - there can be only one; duplicate names
1044     for different numbers are allowed, but not vice versa. First scan down
1045 ph10 459 for duplicates. */
1046 ph10 461
1047 ph10 459 if (i < md->name_count)
1048 ph10 461 {
1049 ph10 459 uschar *slotB = slotA;
1050     while (slotB > md->name_table)
1051     {
1052     slotB -= md->name_entry_size;
1053     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054     {
1055     offset = GET2(slotB, 0) << 1;
1056 ph10 461 condition = offset < offset_top &&
1057 ph10 459 md->offset_vector[offset] >= 0;
1058 ph10 461 if (condition) break;
1059     }
1060 ph10 459 else break;
1061 ph10 461 }
1062    
1063 ph10 459 /* Scan up for duplicates */
1064 ph10 461
1065 ph10 459 if (!condition)
1066 ph10 461 {
1067 ph10 459 slotB = slotA;
1068     for (i++; i < md->name_count; i++)
1069     {
1070     slotB += md->name_entry_size;
1071     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1072     {
1073     offset = GET2(slotB, 0) << 1;
1074 ph10 461 condition = offset < offset_top &&
1075 ph10 459 md->offset_vector[offset] >= 0;
1076 ph10 461 if (condition) break;
1077     }
1078 ph10 459 else break;
1079 ph10 461 }
1080     }
1081 ph10 459 }
1082 ph10 461 }
1083    
1084 ph10 459 /* Chose branch according to the condition */
1085    
1086 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1087 nigel 77 }
1088    
1089 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1090 nigel 93 {
1091     condition = FALSE;
1092     ecode += GET(ecode, 1);
1093     }
1094    
1095 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1096 nigel 93 the final argument match_condassert causes it to stop at the end of an
1097     assertion. */
1098 nigel 77
1099     else
1100     {
1101 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1102     match_condassert, RM3);
1103 nigel 77 if (rrc == MATCH_MATCH)
1104     {
1105 nigel 93 condition = TRUE;
1106     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108     }
1109 ph10 550 else if (rrc != MATCH_NOMATCH &&
1110     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111 nigel 77 {
1112     RRETURN(rrc); /* Need braces because of following else */
1113     }
1114 nigel 93 else
1115     {
1116     condition = FALSE;
1117 ph10 399 ecode += codelink;
1118 nigel 93 }
1119     }
1120 nigel 91
1121 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1122 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1123     match_cbegroup is required for an unlimited repeat of a possibly empty
1124     group. If the second alternative doesn't exist, we can just plough on. */
1125 nigel 91
1126 nigel 93 if (condition || *ecode == OP_ALT)
1127     {
1128 nigel 91 ecode += 1 + LINK_SIZE;
1129 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1130     {
1131     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1132     RRETURN(rrc);
1133     }
1134     else /* Group must match something */
1135     {
1136     flags = 0;
1137     goto TAIL_RECURSE;
1138     }
1139 nigel 77 }
1140 ph10 395 else /* Condition false & no alternative */
1141 nigel 93 {
1142     ecode += 1 + LINK_SIZE;
1143     }
1144     break;
1145 nigel 77
1146 ph10 461
1147 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1148     to close any currently open capturing brackets. */
1149 ph10 461
1150 ph10 447 case OP_CLOSE:
1151 ph10 461 number = GET2(ecode, 1);
1152 ph10 447 offset = number << 1;
1153 ph10 461
1154 ph10 475 #ifdef PCRE_DEBUG
1155 ph10 447 printf("end bracket %d at *ACCEPT", number);
1156     printf("\n");
1157     #endif
1158 nigel 77
1159 ph10 447 md->capture_last = number;
1160     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1161     {
1162     md->offset_vector[offset] =
1163     md->offset_vector[md->offset_end - number];
1164 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1166     }
1167     ecode += 3;
1168 ph10 461 break;
1169 ph10 447
1170    
1171 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1172     recursion, we should restore the offsets appropriately and continue from
1173     after the call. */
1174 nigel 77
1175 ph10 210 case OP_ACCEPT:
1176 nigel 77 case OP_END:
1177     if (md->recursive != NULL && md->recursive->group_num == 0)
1178     {
1179     recursion_info *rec = md->recursive;
1180 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1181 nigel 77 md->recursive = rec->prevrec;
1182     memmove(md->offset_vector, rec->offset_save,
1183     rec->saved_max * sizeof(int));
1184 ph10 461 offset_top = rec->save_offset_top;
1185 nigel 77 ims = original_ims;
1186     ecode = rec->after_call;
1187     break;
1188     }
1189    
1190 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1191     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1192     the subject. In both cases, backtracking will then try other alternatives,
1193     if any. */
1194 ph10 443
1195 ph10 442 if (eptr == mstart &&
1196     (md->notempty ||
1197 ph10 443 (md->notempty_atstart &&
1198 ph10 442 mstart == md->start_subject + md->start_offset)))
1199 ph10 510 MRRETURN(MATCH_NOMATCH);
1200 ph10 443
1201 ph10 442 /* Otherwise, we have a match. */
1202 nigel 77
1203 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1204     md->end_offset_top = offset_top; /* and how many extracts were taken */
1205 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1206 nigel 77
1207 ph10 512 /* For some reason, the macros don't work properly if an expression is
1208     given as the argument to MRRETURN when the heap is in use. */
1209    
1210     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1211     MRRETURN(rrc);
1212    
1213 nigel 77 /* Change option settings */
1214    
1215     case OP_OPT:
1216     ims = ecode[1];
1217     ecode += 2;
1218     DPRINTF(("ims set to %02lx\n", ims));
1219     break;
1220    
1221     /* Assertion brackets. Check the alternative branches in turn - the
1222     matching won't pass the KET for an assertion. If any one branch matches,
1223     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1224     start of each branch to move the current point backwards, so the code at
1225     this level is identical to the lookahead case. */
1226    
1227     case OP_ASSERT:
1228     case OP_ASSERTBACK:
1229     do
1230     {
1231 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1232     RM4);
1233 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1234 ph10 500 {
1235     mstart = md->start_match_ptr; /* In case \K reset it */
1236     break;
1237 ph10 501 }
1238 ph10 550 if (rrc != MATCH_NOMATCH &&
1239     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240     RRETURN(rrc);
1241 nigel 77 ecode += GET(ecode, 1);
1242     }
1243     while (*ecode == OP_ALT);
1244 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1245 nigel 77
1246     /* If checking an assertion for a condition, return MATCH_MATCH. */
1247    
1248     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1249    
1250     /* Continue from after the assertion, updating the offsets high water
1251     mark, since extracts may have been taken during the assertion. */
1252    
1253     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254     ecode += 1 + LINK_SIZE;
1255     offset_top = md->end_offset_top;
1256     continue;
1257    
1258 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1259 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1260 ph10 473 branches. */
1261 nigel 77
1262     case OP_ASSERT_NOT:
1263     case OP_ASSERTBACK_NOT:
1264     do
1265     {
1266 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1267     RM5);
1268 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1269 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1270     {
1271     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272 ph10 482 break;
1273     }
1274 ph10 550 if (rrc != MATCH_NOMATCH &&
1275     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276     RRETURN(rrc);
1277 nigel 77 ecode += GET(ecode,1);
1278     }
1279     while (*ecode == OP_ALT);
1280    
1281     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1282    
1283     ecode += 1 + LINK_SIZE;
1284     continue;
1285    
1286     /* Move the subject pointer back. This occurs only at the start of
1287     each branch of a lookbehind assertion. If we are too close to the start to
1288     move back, this match function fails. When working with UTF-8 we move
1289     back a number of characters, not bytes. */
1290    
1291     case OP_REVERSE:
1292     #ifdef SUPPORT_UTF8
1293     if (utf8)
1294     {
1295 nigel 93 i = GET(ecode, 1);
1296     while (i-- > 0)
1297 nigel 77 {
1298     eptr--;
1299 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1300 ph10 207 BACKCHAR(eptr);
1301 nigel 77 }
1302     }
1303     else
1304     #endif
1305    
1306     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1307    
1308     {
1309 nigel 93 eptr -= GET(ecode, 1);
1310 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1311 nigel 77 }
1312    
1313 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1314 nigel 77
1315 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1316 nigel 77 ecode += 1 + LINK_SIZE;
1317     break;
1318    
1319     /* The callout item calls an external function, if one is provided, passing
1320     details of the match so far. This is mainly for debugging, though the
1321     function is able to force a failure. */
1322    
1323     case OP_CALLOUT:
1324     if (pcre_callout != NULL)
1325     {
1326     pcre_callout_block cb;
1327     cb.version = 1; /* Version 1 of the callout block */
1328     cb.callout_number = ecode[1];
1329     cb.offset_vector = md->offset_vector;
1330 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1331 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1332     cb.start_match = (int)(mstart - md->start_subject);
1333     cb.current_position = (int)(eptr - md->start_subject);
1334 nigel 77 cb.pattern_position = GET(ecode, 2);
1335     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336     cb.capture_top = offset_top/2;
1337     cb.capture_last = md->capture_last;
1338     cb.callout_data = md->callout_data;
1339 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1340 nigel 77 if (rrc < 0) RRETURN(rrc);
1341     }
1342     ecode += 2 + 2*LINK_SIZE;
1343     break;
1344    
1345     /* Recursion either matches the current regex, or some subexpression. The
1346     offset data is the offset to the starting bracket from the start of the
1347     whole pattern. (This is so that it works from duplicated subpatterns.)
1348    
1349     If there are any capturing brackets started but not finished, we have to
1350     save their starting points and reinstate them after the recursion. However,
1351     we don't know how many such there are (offset_top records the completed
1352     total) so we just have to save all the potential data. There may be up to
1353     65535 such values, which is too large to put on the stack, but using malloc
1354     for small numbers seems expensive. As a compromise, the stack is used when
1355     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1356     is used. A problem is what to do if the malloc fails ... there is no way of
1357     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1358     values on the stack, and accept that the rest may be wrong.
1359    
1360     There are also other values that have to be saved. We use a chained
1361     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1362     for the original version of this logic. */
1363    
1364     case OP_RECURSE:
1365     {
1366     callpat = md->start_code + GET(ecode, 1);
1367 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1368     GET2(callpat, 1 + LINK_SIZE);
1369 nigel 77
1370     /* Add to "recursing stack" */
1371    
1372     new_recursive.prevrec = md->recursive;
1373     md->recursive = &new_recursive;
1374    
1375     /* Find where to continue from afterwards */
1376    
1377     ecode += 1 + LINK_SIZE;
1378     new_recursive.after_call = ecode;
1379    
1380     /* Now save the offset data. */
1381    
1382     new_recursive.saved_max = md->offset_end;
1383     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1384     new_recursive.offset_save = stacksave;
1385     else
1386     {
1387     new_recursive.offset_save =
1388     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1389     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1390     }
1391    
1392     memcpy(new_recursive.offset_save, md->offset_vector,
1393     new_recursive.saved_max * sizeof(int));
1394 ph10 461 new_recursive.save_offset_top = offset_top;
1395 nigel 77
1396     /* OK, now we can do the recursion. For each top-level alternative we
1397     restore the offset and recursion data. */
1398    
1399     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1400 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1401 nigel 77 do
1402     {
1403 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1404     md, ims, eptrb, flags, RM6);
1405 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1406 nigel 77 {
1407 nigel 87 DPRINTF(("Recursion matched\n"));
1408 nigel 77 md->recursive = new_recursive.prevrec;
1409     if (new_recursive.offset_save != stacksave)
1410     (pcre_free)(new_recursive.offset_save);
1411 ph10 510 MRRETURN(MATCH_MATCH);
1412 nigel 77 }
1413 ph10 550 else if (rrc != MATCH_NOMATCH &&
1414     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415 nigel 87 {
1416     DPRINTF(("Recursion gave error %d\n", rrc));
1417 ph10 400 if (new_recursive.offset_save != stacksave)
1418     (pcre_free)(new_recursive.offset_save);
1419 nigel 87 RRETURN(rrc);
1420     }
1421 nigel 77
1422     md->recursive = &new_recursive;
1423     memcpy(md->offset_vector, new_recursive.offset_save,
1424     new_recursive.saved_max * sizeof(int));
1425     callpat += GET(callpat, 1);
1426     }
1427     while (*callpat == OP_ALT);
1428    
1429     DPRINTF(("Recursion didn't match\n"));
1430     md->recursive = new_recursive.prevrec;
1431     if (new_recursive.offset_save != stacksave)
1432     (pcre_free)(new_recursive.offset_save);
1433 ph10 510 MRRETURN(MATCH_NOMATCH);
1434 nigel 77 }
1435     /* Control never reaches here */
1436    
1437     /* "Once" brackets are like assertion brackets except that after a match,
1438     the point in the subject string is not moved back. Thus there can never be
1439     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1440     Check the alternative branches in turn - the matching won't pass the KET
1441     for this kind of subpattern. If any one branch matches, we carry on as at
1442 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1443     the start-of-match value in case it was changed by \K. */
1444 nigel 77
1445     case OP_ONCE:
1446 nigel 91 prev = ecode;
1447     saved_eptr = eptr;
1448    
1449     do
1450 nigel 77 {
1451 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1452 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1453 ph10 500 {
1454     mstart = md->start_match_ptr;
1455     break;
1456 ph10 501 }
1457 ph10 550 if (rrc != MATCH_NOMATCH &&
1458     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459     RRETURN(rrc);
1460 nigel 91 ecode += GET(ecode,1);
1461     }
1462     while (*ecode == OP_ALT);
1463 nigel 77
1464 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1465 nigel 77
1466 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1467 nigel 77
1468 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1469     mark, since extracts may have been taken. */
1470 nigel 77
1471 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1472 nigel 77
1473 nigel 91 offset_top = md->end_offset_top;
1474     eptr = md->end_match_ptr;
1475 nigel 77
1476 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1477     happens for a repeating ket if no characters were matched in the group.
1478     This is the forcible breaking of infinite loops as implemented in Perl
1479     5.005. If there is an options reset, it will get obeyed in the normal
1480     course of events. */
1481 nigel 77
1482 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1483     {
1484     ecode += 1+LINK_SIZE;
1485     break;
1486     }
1487 nigel 77
1488 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1489     preceding bracket, in the appropriate order. The second "call" of match()
1490     uses tail recursion, to avoid using another stack frame. We need to reset
1491     any options that changed within the bracket before re-running it, so
1492     check the next opcode. */
1493 nigel 77
1494 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1495     {
1496     ims = (ims & ~PCRE_IMS) | ecode[4];
1497     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1498     }
1499 nigel 77
1500 nigel 91 if (*ecode == OP_KETRMIN)
1501     {
1502 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1503 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1504     ecode = prev;
1505 ph10 197 flags = 0;
1506 nigel 91 goto TAIL_RECURSE;
1507 nigel 77 }
1508 nigel 91 else /* OP_KETRMAX */
1509     {
1510 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1511 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1512     ecode += 1 + LINK_SIZE;
1513 ph10 197 flags = 0;
1514 nigel 91 goto TAIL_RECURSE;
1515     }
1516     /* Control never gets here */
1517 nigel 77
1518     /* An alternation is the end of a branch; scan along to find the end of the
1519     bracketed group and go to there. */
1520    
1521     case OP_ALT:
1522     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1523     break;
1524    
1525 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1526     indicating that it may occur zero times. It may repeat infinitely, or not
1527     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1528     with fixed upper repeat limits are compiled as a number of copies, with the
1529     optional ones preceded by BRAZERO or BRAMINZERO. */
1530 nigel 77
1531     case OP_BRAZERO:
1532     {
1533     next = ecode+1;
1534 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1535 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1536     do next += GET(next,1); while (*next == OP_ALT);
1537 nigel 93 ecode = next + 1 + LINK_SIZE;
1538 nigel 77 }
1539     break;
1540    
1541     case OP_BRAMINZERO:
1542     {
1543     next = ecode+1;
1544 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1545 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1546 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1547     ecode++;
1548     }
1549     break;
1550    
1551 ph10 335 case OP_SKIPZERO:
1552     {
1553     next = ecode+1;
1554     do next += GET(next,1); while (*next == OP_ALT);
1555     ecode = next + 1 + LINK_SIZE;
1556     }
1557     break;
1558    
1559 nigel 93 /* End of a group, repeated or non-repeating. */
1560 nigel 77
1561     case OP_KET:
1562     case OP_KETRMIN:
1563     case OP_KETRMAX:
1564 nigel 91 prev = ecode - GET(ecode, 1);
1565 nigel 77
1566 nigel 93 /* If this was a group that remembered the subject start, in order to break
1567     infinite repeats of empty string matches, retrieve the subject start from
1568     the chain. Otherwise, set it NULL. */
1569 nigel 77
1570 nigel 93 if (*prev >= OP_SBRA)
1571     {
1572     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1573     eptrb = eptrb->epb_prev; /* Backup to previous group */
1574     }
1575     else saved_eptr = NULL;
1576 nigel 77
1577 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1578     matching and return MATCH_MATCH, but record the current high water mark for
1579     use by positive assertions. We also need to record the match start in case
1580     it was changed by \K. */
1581 nigel 93
1582 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1583     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1584     *prev == OP_ONCE)
1585     {
1586     md->end_match_ptr = eptr; /* For ONCE */
1587     md->end_offset_top = offset_top;
1588 ph10 500 md->start_match_ptr = mstart;
1589 ph10 510 MRRETURN(MATCH_MATCH);
1590 nigel 91 }
1591 nigel 77
1592 nigel 93 /* For capturing groups we have to check the group number back at the start
1593     and if necessary complete handling an extraction by setting the offsets and
1594     bumping the high water mark. Note that whole-pattern recursion is coded as
1595     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1596     when the OP_END is reached. Other recursion is handled here. */
1597 nigel 77
1598 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1599 nigel 91 {
1600 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1601 nigel 91 offset = number << 1;
1602 ph10 461
1603 ph10 475 #ifdef PCRE_DEBUG
1604 nigel 91 printf("end bracket %d", number);
1605     printf("\n");
1606 nigel 77 #endif
1607    
1608 nigel 93 md->capture_last = number;
1609     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1610 nigel 91 {
1611 nigel 93 md->offset_vector[offset] =
1612     md->offset_vector[md->offset_end - number];
1613 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1615     }
1616 nigel 77
1617 nigel 93 /* Handle a recursively called group. Restore the offsets
1618     appropriately and continue from after the call. */
1619 nigel 77
1620 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1621     {
1622     recursion_info *rec = md->recursive;
1623     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1624     md->recursive = rec->prevrec;
1625     memcpy(md->offset_vector, rec->offset_save,
1626     rec->saved_max * sizeof(int));
1627 ph10 461 offset_top = rec->save_offset_top;
1628 nigel 93 ecode = rec->after_call;
1629     ims = original_ims;
1630     break;
1631 nigel 77 }
1632 nigel 91 }
1633 nigel 77
1634 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1635     flags, in case they got changed during the group. */
1636 nigel 77
1637 nigel 91 ims = original_ims;
1638     DPRINTF(("ims reset to %02lx\n", ims));
1639 nigel 77
1640 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1641     happens for a repeating ket if no characters were matched in the group.
1642     This is the forcible breaking of infinite loops as implemented in Perl
1643     5.005. If there is an options reset, it will get obeyed in the normal
1644     course of events. */
1645 nigel 77
1646 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1647     {
1648     ecode += 1 + LINK_SIZE;
1649     break;
1650     }
1651 nigel 77
1652 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1653     preceding bracket, in the appropriate order. In the second case, we can use
1654 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1655     unlimited repeat of a group that can match an empty string. */
1656 nigel 77
1657 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1658    
1659 nigel 91 if (*ecode == OP_KETRMIN)
1660     {
1661 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1662 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663 ph10 197 if (flags != 0) /* Could match an empty string */
1664     {
1665     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1666     RRETURN(rrc);
1667     }
1668 nigel 91 ecode = prev;
1669     goto TAIL_RECURSE;
1670 nigel 77 }
1671 nigel 91 else /* OP_KETRMAX */
1672     {
1673 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1674 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1675     ecode += 1 + LINK_SIZE;
1676 ph10 197 flags = 0;
1677 nigel 91 goto TAIL_RECURSE;
1678     }
1679     /* Control never gets here */
1680 nigel 77
1681     /* Start of subject unless notbol, or after internal newline if multiline */
1682    
1683     case OP_CIRC:
1684 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1685 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1686     {
1687 nigel 91 if (eptr != md->start_subject &&
1688 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1689 ph10 510 MRRETURN(MATCH_NOMATCH);
1690 nigel 77 ecode++;
1691     break;
1692     }
1693     /* ... else fall through */
1694    
1695     /* Start of subject assertion */
1696    
1697     case OP_SOD:
1698 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1699 nigel 77 ecode++;
1700     break;
1701    
1702     /* Start of match assertion */
1703    
1704     case OP_SOM:
1705 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1706 nigel 77 ecode++;
1707     break;
1708 ph10 172
1709 ph10 168 /* Reset the start of match point */
1710 ph10 172
1711 ph10 168 case OP_SET_SOM:
1712     mstart = eptr;
1713 ph10 172 ecode++;
1714     break;
1715 nigel 77
1716     /* Assert before internal newline if multiline, or before a terminating
1717     newline unless endonly is set, else end of subject unless noteol is set. */
1718    
1719     case OP_DOLL:
1720     if ((ims & PCRE_MULTILINE) != 0)
1721     {
1722     if (eptr < md->end_subject)
1723 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724 nigel 77 else
1725 ph10 579 {
1726     if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727 ph10 553 SCHECK_PARTIAL();
1728     }
1729 nigel 77 ecode++;
1730     break;
1731     }
1732 ph10 553 else /* Not multiline */
1733 nigel 77 {
1734 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735 ph10 553 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1736 nigel 77 }
1737 ph10 579
1738 nigel 91 /* ... else fall through for endonly */
1739 nigel 77
1740     /* End of subject assertion (\z) */
1741    
1742     case OP_EOD:
1743 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744 ph10 553 SCHECK_PARTIAL();
1745 nigel 77 ecode++;
1746     break;
1747    
1748     /* End of subject or ending \n assertion (\Z) */
1749    
1750     case OP_EODN:
1751 ph10 553 ASSERT_NL_OR_EOS:
1752     if (eptr < md->end_subject &&
1753 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754 ph10 510 MRRETURN(MATCH_NOMATCH);
1755 ph10 579
1756 ph10 553 /* Either at end of string or \n before end. */
1757 ph10 579
1758 ph10 553 SCHECK_PARTIAL();
1759 nigel 77 ecode++;
1760     break;
1761    
1762     /* Word boundary assertions */
1763    
1764     case OP_NOT_WORD_BOUNDARY:
1765     case OP_WORD_BOUNDARY:
1766     {
1767    
1768     /* Find out if the previous and current characters are "word" characters.
1769     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1770 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1771 ph10 435 partial matching. */
1772 nigel 77
1773     #ifdef SUPPORT_UTF8
1774     if (utf8)
1775     {
1776 ph10 518 /* Get status of previous character */
1777 ph10 527
1778 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1779     {
1780 ph10 409 USPTR lastptr = eptr - 1;
1781 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1782 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783 nigel 77 GETCHAR(c, lastptr);
1784 ph10 527 #ifdef SUPPORT_UCP
1785 ph10 518 if (md->use_ucp)
1786     {
1787     if (c == '_') prev_is_word = TRUE; else
1788 ph10 527 {
1789 ph10 518 int cat = UCD_CATEGORY(c);
1790     prev_is_word = (cat == ucp_L || cat == ucp_N);
1791 ph10 527 }
1792     }
1793     else
1794     #endif
1795 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796     }
1797 ph10 527
1798 ph10 518 /* Get status of next character */
1799 ph10 527
1800 ph10 443 if (eptr >= md->end_subject)
1801 nigel 77 {
1802 ph10 443 SCHECK_PARTIAL();
1803     cur_is_word = FALSE;
1804 ph10 428 }
1805     else
1806     {
1807 nigel 77 GETCHAR(c, eptr);
1808 ph10 527 #ifdef SUPPORT_UCP
1809 ph10 518 if (md->use_ucp)
1810     {
1811     if (c == '_') cur_is_word = TRUE; else
1812 ph10 527 {
1813 ph10 518 int cat = UCD_CATEGORY(c);
1814     cur_is_word = (cat == ucp_L || cat == ucp_N);
1815 ph10 527 }
1816     }
1817     else
1818     #endif
1819 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820     }
1821     }
1822     else
1823     #endif
1824    
1825 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1827 nigel 77
1828     {
1829 ph10 518 /* Get status of previous character */
1830 ph10 527
1831 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1832     {
1833 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834 ph10 527 #ifdef SUPPORT_UCP
1835 ph10 518 if (md->use_ucp)
1836     {
1837 ph10 527 c = eptr[-1];
1838 ph10 518 if (c == '_') prev_is_word = TRUE; else
1839 ph10 527 {
1840 ph10 518 int cat = UCD_CATEGORY(c);
1841     prev_is_word = (cat == ucp_L || cat == ucp_N);
1842 ph10 527 }
1843     }
1844     else
1845     #endif
1846 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847     }
1848 ph10 527
1849 ph10 518 /* Get status of next character */
1850 ph10 527
1851 ph10 443 if (eptr >= md->end_subject)
1852 ph10 428 {
1853 ph10 443 SCHECK_PARTIAL();
1854     cur_is_word = FALSE;
1855 ph10 428 }
1856 ph10 527 else
1857     #ifdef SUPPORT_UCP
1858 ph10 518 if (md->use_ucp)
1859     {
1860 ph10 527 c = *eptr;
1861 ph10 518 if (c == '_') cur_is_word = TRUE; else
1862 ph10 527 {
1863 ph10 518 int cat = UCD_CATEGORY(c);
1864     cur_is_word = (cat == ucp_L || cat == ucp_N);
1865 ph10 527 }
1866     }
1867     else
1868     #endif
1869 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870 nigel 77 }
1871    
1872     /* Now see if the situation is what we want */
1873    
1874     if ((*ecode++ == OP_WORD_BOUNDARY)?
1875     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1876 ph10 510 MRRETURN(MATCH_NOMATCH);
1877 nigel 77 }
1878     break;
1879    
1880     /* Match a single character type; inline for speed */
1881    
1882     case OP_ANY:
1883 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1884 ph10 345 /* Fall through */
1885    
1886 ph10 341 case OP_ALLANY:
1887 ph10 443 if (eptr++ >= md->end_subject)
1888 ph10 428 {
1889 ph10 443 SCHECK_PARTIAL();
1890 ph10 510 MRRETURN(MATCH_NOMATCH);
1891 ph10 443 }
1892 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1893 nigel 77 ecode++;
1894     break;
1895    
1896     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1897     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1898    
1899     case OP_ANYBYTE:
1900 ph10 443 if (eptr++ >= md->end_subject)
1901 ph10 428 {
1902 ph10 443 SCHECK_PARTIAL();
1903 ph10 510 MRRETURN(MATCH_NOMATCH);
1904 ph10 443 }
1905 nigel 77 ecode++;
1906     break;
1907    
1908     case OP_NOT_DIGIT:
1909 ph10 443 if (eptr >= md->end_subject)
1910 ph10 428 {
1911 ph10 443 SCHECK_PARTIAL();
1912 ph10 510 MRRETURN(MATCH_NOMATCH);
1913 ph10 443 }
1914 nigel 77 GETCHARINCTEST(c, eptr);
1915     if (
1916     #ifdef SUPPORT_UTF8
1917     c < 256 &&
1918     #endif
1919     (md->ctypes[c] & ctype_digit) != 0
1920     )
1921 ph10 510 MRRETURN(MATCH_NOMATCH);
1922 nigel 77 ecode++;
1923     break;
1924    
1925     case OP_DIGIT:
1926 ph10 443 if (eptr >= md->end_subject)
1927 ph10 428 {
1928 ph10 443 SCHECK_PARTIAL();
1929 ph10 510 MRRETURN(MATCH_NOMATCH);
1930 ph10 443 }
1931 nigel 77 GETCHARINCTEST(c, eptr);
1932     if (
1933     #ifdef SUPPORT_UTF8
1934     c >= 256 ||
1935     #endif
1936     (md->ctypes[c] & ctype_digit) == 0
1937     )
1938 ph10 510 MRRETURN(MATCH_NOMATCH);
1939 nigel 77 ecode++;
1940     break;
1941    
1942     case OP_NOT_WHITESPACE:
1943 ph10 443 if (eptr >= md->end_subject)
1944 ph10 428 {
1945 ph10 443 SCHECK_PARTIAL();
1946 ph10 510 MRRETURN(MATCH_NOMATCH);
1947 ph10 443 }
1948 nigel 77 GETCHARINCTEST(c, eptr);
1949     if (
1950     #ifdef SUPPORT_UTF8
1951     c < 256 &&
1952     #endif
1953     (md->ctypes[c] & ctype_space) != 0
1954     )
1955 ph10 510 MRRETURN(MATCH_NOMATCH);
1956 nigel 77 ecode++;
1957     break;
1958    
1959     case OP_WHITESPACE:
1960 ph10 443 if (eptr >= md->end_subject)
1961 ph10 428 {
1962 ph10 443 SCHECK_PARTIAL();
1963 ph10 510 MRRETURN(MATCH_NOMATCH);
1964 ph10 443 }
1965 nigel 77 GETCHARINCTEST(c, eptr);
1966     if (
1967     #ifdef SUPPORT_UTF8
1968     c >= 256 ||
1969     #endif
1970     (md->ctypes[c] & ctype_space) == 0
1971     )
1972 ph10 510 MRRETURN(MATCH_NOMATCH);
1973 nigel 77 ecode++;
1974     break;
1975    
1976     case OP_NOT_WORDCHAR:
1977 ph10 443 if (eptr >= md->end_subject)
1978 ph10 428 {
1979 ph10 443 SCHECK_PARTIAL();
1980 ph10 510 MRRETURN(MATCH_NOMATCH);
1981 ph10 443 }
1982 nigel 77 GETCHARINCTEST(c, eptr);
1983     if (
1984     #ifdef SUPPORT_UTF8
1985     c < 256 &&
1986     #endif
1987     (md->ctypes[c] & ctype_word) != 0
1988     )
1989 ph10 510 MRRETURN(MATCH_NOMATCH);
1990 nigel 77 ecode++;
1991     break;
1992    
1993     case OP_WORDCHAR:
1994 ph10 443 if (eptr >= md->end_subject)
1995 ph10 428 {
1996 ph10 443 SCHECK_PARTIAL();
1997 ph10 510 MRRETURN(MATCH_NOMATCH);
1998 ph10 443 }
1999 nigel 77 GETCHARINCTEST(c, eptr);
2000     if (
2001     #ifdef SUPPORT_UTF8
2002     c >= 256 ||
2003     #endif
2004     (md->ctypes[c] & ctype_word) == 0
2005     )
2006 ph10 510 MRRETURN(MATCH_NOMATCH);
2007 nigel 77 ecode++;
2008     break;
2009    
2010 nigel 93 case OP_ANYNL:
2011 ph10 443 if (eptr >= md->end_subject)
2012 ph10 428 {
2013 ph10 443 SCHECK_PARTIAL();
2014 ph10 510 MRRETURN(MATCH_NOMATCH);
2015 ph10 443 }
2016 nigel 93 GETCHARINCTEST(c, eptr);
2017     switch(c)
2018     {
2019 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2020 nigel 93 case 0x000d:
2021     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2022     break;
2023 ph10 231
2024 nigel 93 case 0x000a:
2025 ph10 231 break;
2026    
2027 nigel 93 case 0x000b:
2028     case 0x000c:
2029     case 0x0085:
2030     case 0x2028:
2031     case 0x2029:
2032 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2033 nigel 93 break;
2034     }
2035     ecode++;
2036     break;
2037    
2038 ph10 178 case OP_NOT_HSPACE:
2039 ph10 443 if (eptr >= md->end_subject)
2040 ph10 428 {
2041 ph10 443 SCHECK_PARTIAL();
2042 ph10 510 MRRETURN(MATCH_NOMATCH);
2043 ph10 443 }
2044 ph10 178 GETCHARINCTEST(c, eptr);
2045     switch(c)
2046     {
2047     default: break;
2048     case 0x09: /* HT */
2049     case 0x20: /* SPACE */
2050     case 0xa0: /* NBSP */
2051     case 0x1680: /* OGHAM SPACE MARK */
2052     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2053     case 0x2000: /* EN QUAD */
2054     case 0x2001: /* EM QUAD */
2055     case 0x2002: /* EN SPACE */
2056     case 0x2003: /* EM SPACE */
2057     case 0x2004: /* THREE-PER-EM SPACE */
2058     case 0x2005: /* FOUR-PER-EM SPACE */
2059     case 0x2006: /* SIX-PER-EM SPACE */
2060     case 0x2007: /* FIGURE SPACE */
2061     case 0x2008: /* PUNCTUATION SPACE */
2062     case 0x2009: /* THIN SPACE */
2063     case 0x200A: /* HAIR SPACE */
2064     case 0x202f: /* NARROW NO-BREAK SPACE */
2065     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2066     case 0x3000: /* IDEOGRAPHIC SPACE */
2067 ph10 510 MRRETURN(MATCH_NOMATCH);
2068 ph10 178 }
2069     ecode++;
2070     break;
2071    
2072     case OP_HSPACE:
2073 ph10 443 if (eptr >= md->end_subject)
2074 ph10 428 {
2075 ph10 443 SCHECK_PARTIAL();
2076 ph10 510 MRRETURN(MATCH_NOMATCH);
2077 ph10 443 }
2078 ph10 178 GETCHARINCTEST(c, eptr);
2079     switch(c)
2080     {
2081 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2082 ph10 178 case 0x09: /* HT */
2083     case 0x20: /* SPACE */
2084     case 0xa0: /* NBSP */
2085     case 0x1680: /* OGHAM SPACE MARK */
2086     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2087     case 0x2000: /* EN QUAD */
2088     case 0x2001: /* EM QUAD */
2089     case 0x2002: /* EN SPACE */
2090     case 0x2003: /* EM SPACE */
2091     case 0x2004: /* THREE-PER-EM SPACE */
2092     case 0x2005: /* FOUR-PER-EM SPACE */
2093     case 0x2006: /* SIX-PER-EM SPACE */
2094     case 0x2007: /* FIGURE SPACE */
2095     case 0x2008: /* PUNCTUATION SPACE */
2096     case 0x2009: /* THIN SPACE */
2097     case 0x200A: /* HAIR SPACE */
2098     case 0x202f: /* NARROW NO-BREAK SPACE */
2099     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2100     case 0x3000: /* IDEOGRAPHIC SPACE */
2101     break;
2102     }
2103     ecode++;
2104     break;
2105    
2106     case OP_NOT_VSPACE:
2107 ph10 443 if (eptr >= md->end_subject)
2108 ph10 428 {
2109 ph10 443 SCHECK_PARTIAL();
2110 ph10 510 MRRETURN(MATCH_NOMATCH);
2111 ph10 443 }
2112 ph10 178 GETCHARINCTEST(c, eptr);
2113     switch(c)
2114     {
2115     default: break;
2116     case 0x0a: /* LF */
2117     case 0x0b: /* VT */
2118     case 0x0c: /* FF */
2119     case 0x0d: /* CR */
2120     case 0x85: /* NEL */
2121     case 0x2028: /* LINE SEPARATOR */
2122     case 0x2029: /* PARAGRAPH SEPARATOR */
2123 ph10 510 MRRETURN(MATCH_NOMATCH);
2124 ph10 178 }
2125     ecode++;
2126     break;
2127    
2128     case OP_VSPACE:
2129 ph10 443 if (eptr >= md->end_subject)
2130 ph10 428 {
2131 ph10 443 SCHECK_PARTIAL();
2132 ph10 510 MRRETURN(MATCH_NOMATCH);
2133 ph10 443 }
2134 ph10 178 GETCHARINCTEST(c, eptr);
2135     switch(c)
2136     {
2137 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2138 ph10 178 case 0x0a: /* LF */
2139     case 0x0b: /* VT */
2140     case 0x0c: /* FF */
2141     case 0x0d: /* CR */
2142     case 0x85: /* NEL */
2143     case 0x2028: /* LINE SEPARATOR */
2144     case 0x2029: /* PARAGRAPH SEPARATOR */
2145     break;
2146     }
2147     ecode++;
2148     break;
2149    
2150 nigel 77 #ifdef SUPPORT_UCP
2151     /* Check the next character by Unicode property. We will get here only
2152     if the support is in the binary; otherwise a compile-time error occurs. */
2153    
2154     case OP_PROP:
2155     case OP_NOTPROP:
2156 ph10 443 if (eptr >= md->end_subject)
2157 ph10 428 {
2158 ph10 443 SCHECK_PARTIAL();
2159 ph10 510 MRRETURN(MATCH_NOMATCH);
2160 ph10 443 }
2161 nigel 77 GETCHARINCTEST(c, eptr);
2162     {
2163 ph10 384 const ucd_record *prop = GET_UCD(c);
2164 nigel 77
2165 nigel 87 switch(ecode[1])
2166     {
2167     case PT_ANY:
2168 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2169 nigel 87 break;
2170 nigel 77
2171 nigel 87 case PT_LAMP:
2172 ph10 349 if ((prop->chartype == ucp_Lu ||
2173     prop->chartype == ucp_Ll ||
2174     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2175 ph10 510 MRRETURN(MATCH_NOMATCH);
2176 ph10 517 break;
2177 nigel 87
2178     case PT_GC:
2179 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2180 ph10 510 MRRETURN(MATCH_NOMATCH);
2181 nigel 87 break;
2182    
2183     case PT_PC:
2184 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2185 ph10 510 MRRETURN(MATCH_NOMATCH);
2186 nigel 87 break;
2187    
2188     case PT_SC:
2189 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2190 ph10 510 MRRETURN(MATCH_NOMATCH);
2191 nigel 87 break;
2192 ph10 527
2193 ph10 517 /* These are specials */
2194 ph10 527
2195 ph10 517 case PT_ALNUM:
2196     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2198     MRRETURN(MATCH_NOMATCH);
2199 ph10 527 break;
2200    
2201 ph10 517 case PT_SPACE: /* Perl space */
2202     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2203     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2204     == (op == OP_NOTPROP))
2205     MRRETURN(MATCH_NOMATCH);
2206 ph10 527 break;
2207    
2208 ph10 517 case PT_PXSPACE: /* POSIX space */
2209     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2210 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2211 ph10 517 c == CHAR_FF || c == CHAR_CR)
2212     == (op == OP_NOTPROP))
2213     MRRETURN(MATCH_NOMATCH);
2214 ph10 527 break;
2215 nigel 87
2216 ph10 527 case PT_WORD:
2217 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2218 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2219 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2220     MRRETURN(MATCH_NOMATCH);
2221 ph10 527 break;
2222    
2223 ph10 517 /* This should never occur */
2224    
2225 nigel 87 default:
2226     RRETURN(PCRE_ERROR_INTERNAL);
2227 nigel 77 }
2228 nigel 87
2229     ecode += 3;
2230 nigel 77 }
2231     break;
2232    
2233     /* Match an extended Unicode sequence. We will get here only if the support
2234     is in the binary; otherwise a compile-time error occurs. */
2235    
2236     case OP_EXTUNI:
2237 ph10 443 if (eptr >= md->end_subject)
2238 ph10 428 {
2239 ph10 443 SCHECK_PARTIAL();
2240 ph10 510 MRRETURN(MATCH_NOMATCH);
2241 ph10 443 }
2242 nigel 77 GETCHARINCTEST(c, eptr);
2243     {
2244 ph10 349 int category = UCD_CATEGORY(c);
2245 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2246 nigel 77 while (eptr < md->end_subject)
2247     {
2248     int len = 1;
2249     if (!utf8) c = *eptr; else
2250     {
2251     GETCHARLEN(c, eptr, len);
2252     }
2253 ph10 349 category = UCD_CATEGORY(c);
2254 nigel 77 if (category != ucp_M) break;
2255     eptr += len;
2256     }
2257     }
2258     ecode++;
2259     break;
2260     #endif
2261    
2262    
2263     /* Match a back reference, possibly repeatedly. Look past the end of the
2264     item to see if there is repeat information following. The code is similar
2265     to that for character classes, but repeated for efficiency. Then obey
2266     similar code to character type repeats - written out again for speed.
2267     However, if the referenced string is the empty string, always treat
2268     it as matched, any number of times (otherwise there could be infinite
2269     loops). */
2270    
2271     case OP_REF:
2272 ph10 595 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2273     ecode += 3;
2274 ph10 345
2275 ph10 595 /* If the reference is unset, there are two possibilities:
2276 ph10 345
2277 ph10 595 (a) In the default, Perl-compatible state, set the length negative;
2278     this ensures that every attempt at a match fails. We can't just fail
2279     here, because of the possibility of quantifiers with zero minima.
2280 ph10 345
2281 ph10 595 (b) If the JavaScript compatibility flag is set, set the length to zero
2282     so that the back reference matches an empty string.
2283 ph10 345
2284 ph10 595 Otherwise, set the length to the length of what was matched by the
2285     referenced subpattern. */
2286 ph10 345
2287 ph10 595 if (offset >= offset_top || md->offset_vector[offset] < 0)
2288     length = (md->jscript_compat)? 0 : -1;
2289     else
2290     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2291 nigel 77
2292 ph10 595 /* Set up for repetition, or handle the non-repeated case */
2293 nigel 77
2294 ph10 595 switch (*ecode)
2295     {
2296     case OP_CRSTAR:
2297     case OP_CRMINSTAR:
2298     case OP_CRPLUS:
2299     case OP_CRMINPLUS:
2300     case OP_CRQUERY:
2301     case OP_CRMINQUERY:
2302     c = *ecode++ - OP_CRSTAR;
2303     minimize = (c & 1) != 0;
2304     min = rep_min[c]; /* Pick up values from tables; */
2305     max = rep_max[c]; /* zero for max => infinity */
2306     if (max == 0) max = INT_MAX;
2307     break;
2308 nigel 77
2309 ph10 595 case OP_CRRANGE:
2310     case OP_CRMINRANGE:
2311     minimize = (*ecode == OP_CRMINRANGE);
2312     min = GET2(ecode, 1);
2313     max = GET2(ecode, 3);
2314     if (max == 0) max = INT_MAX;
2315     ecode += 5;
2316     break;
2317 nigel 77
2318 ph10 595 default: /* No repeat follows */
2319     if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2320     {
2321     CHECK_PARTIAL();
2322     MRRETURN(MATCH_NOMATCH);
2323 nigel 77 }
2324 ph10 595 eptr += length;
2325     continue; /* With the main loop */
2326     }
2327 nigel 77
2328 ph10 595 /* Handle repeated back references. If the length of the reference is
2329     zero, just continue with the main loop. */
2330 ph10 443
2331 ph10 595 if (length == 0) continue;
2332 nigel 77
2333 ph10 595 /* First, ensure the minimum number of matches are present. We get back
2334     the length of the reference string explicitly rather than passing the
2335     address of eptr, so that eptr can be a register variable. */
2336 nigel 77
2337 ph10 595 for (i = 1; i <= min; i++)
2338     {
2339     int slength;
2340     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2341 nigel 77 {
2342 ph10 595 CHECK_PARTIAL();
2343     MRRETURN(MATCH_NOMATCH);
2344 nigel 77 }
2345 ph10 595 eptr += slength;
2346     }
2347 nigel 77
2348 ph10 595 /* If min = max, continue at the same level without recursion.
2349     They are not both allowed to be zero. */
2350 nigel 77
2351 ph10 595 if (min == max) continue;
2352 nigel 77
2353 ph10 595 /* If minimizing, keep trying and advancing the pointer */
2354 nigel 77
2355 ph10 595 if (minimize)
2356     {
2357     for (fi = min;; fi++)
2358 nigel 77 {
2359 ph10 595 int slength;
2360     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2361     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2362     if (fi >= max) MRRETURN(MATCH_NOMATCH);
2363     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2364 nigel 77 {
2365 ph10 595 CHECK_PARTIAL();
2366     MRRETURN(MATCH_NOMATCH);
2367 nigel 77 }
2368 ph10 595 eptr += slength;
2369 nigel 77 }
2370 ph10 595 /* Control never gets here */
2371     }
2372 nigel 77
2373 ph10 595 /* If maximizing, find the longest string and work backwards */
2374 nigel 77
2375 ph10 595 else
2376     {
2377     pp = eptr;
2378     for (i = min; i < max; i++)
2379 nigel 77 {
2380 ph10 595 int slength;
2381     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2382 nigel 77 {
2383 ph10 595 CHECK_PARTIAL();
2384     break;
2385 nigel 77 }
2386 ph10 595 eptr += slength;
2387 nigel 77 }
2388 ph10 595 while (eptr >= pp)
2389     {
2390     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2391     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2392     eptr -= length;
2393     }
2394     MRRETURN(MATCH_NOMATCH);
2395 nigel 77 }
2396     /* Control never gets here */
2397    
2398     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2399     used when all the characters in the class have values in the range 0-255,
2400     and either the matching is caseful, or the characters are in the range
2401     0-127 when UTF-8 processing is enabled. The only difference between
2402     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2403     encountered.
2404    
2405     First, look past the end of the item to see if there is repeat information
2406     following. Then obey similar code to character type repeats - written out
2407     again for speed. */
2408    
2409     case OP_NCLASS:
2410     case OP_CLASS:
2411     {
2412     data = ecode + 1; /* Save for matching */
2413     ecode += 33; /* Advance past the item */
2414    
2415     switch (*ecode)
2416     {
2417     case OP_CRSTAR:
2418     case OP_CRMINSTAR:
2419     case OP_CRPLUS:
2420     case OP_CRMINPLUS:
2421     case OP_CRQUERY:
2422     case OP_CRMINQUERY:
2423     c = *ecode++ - OP_CRSTAR;
2424     minimize = (c & 1) != 0;
2425     min = rep_min[c]; /* Pick up values from tables; */
2426     max = rep_max[c]; /* zero for max => infinity */
2427     if (max == 0) max = INT_MAX;
2428     break;
2429    
2430     case OP_CRRANGE:
2431     case OP_CRMINRANGE:
2432     minimize = (*ecode == OP_CRMINRANGE);
2433     min = GET2(ecode, 1);
2434     max = GET2(ecode, 3);
2435     if (max == 0) max = INT_MAX;
2436     ecode += 5;
2437     break;
2438    
2439     default: /* No repeat follows */
2440     min = max = 1;
2441     break;
2442     }
2443    
2444     /* First, ensure the minimum number of matches are present. */
2445    
2446     #ifdef SUPPORT_UTF8
2447     /* UTF-8 mode */
2448     if (utf8)
2449     {
2450     for (i = 1; i <= min; i++)
2451     {
2452 ph10 427 if (eptr >= md->end_subject)
2453 ph10 426 {
2454 ph10 428 SCHECK_PARTIAL();
2455 ph10 510 MRRETURN(MATCH_NOMATCH);
2456 ph10 427 }
2457 nigel 77 GETCHARINC(c, eptr);
2458     if (c > 255)
2459     {
2460 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2461 nigel 77 }
2462     else
2463     {
2464 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2465 nigel 77 }
2466     }
2467     }
2468     else
2469     #endif
2470     /* Not UTF-8 mode */
2471     {
2472     for (i = 1; i <= min; i++)
2473     {
2474 ph10 427 if (eptr >= md->end_subject)
2475 ph10 426 {
2476 ph10 428 SCHECK_PARTIAL();
2477 ph10 510 MRRETURN(MATCH_NOMATCH);
2478 ph10 427 }
2479 nigel 77 c = *eptr++;
2480 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2481 nigel 77 }
2482     }
2483    
2484     /* If max == min we can continue with the main loop without the
2485     need to recurse. */
2486    
2487     if (min == max) continue;
2488    
2489     /* If minimizing, keep testing the rest of the expression and advancing
2490     the pointer while it matches the class. */
2491    
2492     if (minimize)
2493     {
2494     #ifdef SUPPORT_UTF8
2495     /* UTF-8 mode */
2496     if (utf8)
2497     {
2498     for (fi = min;; fi++)
2499     {
2500 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2501 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2502 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2503 ph10 427 if (eptr >= md->end_subject)
2504 ph10 426 {
2505 ph10 427 SCHECK_PARTIAL();
2506 ph10 510 MRRETURN(MATCH_NOMATCH);
2507 ph10 427 }
2508 nigel 77 GETCHARINC(c, eptr);
2509     if (c > 255)
2510     {
2511 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2512 nigel 77 }
2513     else
2514     {
2515 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2516 nigel 77 }
2517     }
2518     }
2519     else
2520     #endif
2521     /* Not UTF-8 mode */
2522     {
2523     for (fi = min;; fi++)
2524     {
2525 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2526 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2528 ph10 427 if (eptr >= md->end_subject)
2529 ph10 426 {
2530 ph10 427 SCHECK_PARTIAL();
2531 ph10 510 MRRETURN(MATCH_NOMATCH);
2532 ph10 427 }
2533 nigel 77 c = *eptr++;
2534 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2535 nigel 77 }
2536     }
2537     /* Control never gets here */
2538     }
2539    
2540     /* If maximizing, find the longest possible run, then work backwards. */
2541    
2542     else
2543     {
2544     pp = eptr;
2545    
2546     #ifdef SUPPORT_UTF8
2547     /* UTF-8 mode */
2548     if (utf8)
2549     {
2550     for (i = min; i < max; i++)
2551     {
2552     int len = 1;
2553 ph10 463 if (eptr >= md->end_subject)
2554 ph10 462 {
2555 ph10 463 SCHECK_PARTIAL();
2556 ph10 462 break;
2557 ph10 463 }
2558 nigel 77 GETCHARLEN(c, eptr, len);
2559     if (c > 255)
2560     {
2561     if (op == OP_CLASS) break;
2562     }
2563     else
2564     {
2565     if ((data[c/8] & (1 << (c&7))) == 0) break;
2566     }
2567     eptr += len;
2568     }
2569     for (;;)
2570     {
2571 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2572 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2573     if (eptr-- == pp) break; /* Stop if tried at original pos */
2574     BACKCHAR(eptr);
2575     }
2576     }
2577     else
2578     #endif
2579     /* Not UTF-8 mode */
2580     {
2581     for (i = min; i < max; i++)
2582     {
2583 ph10 463 if (eptr >= md->end_subject)
2584 ph10 462 {
2585 ph10 463 SCHECK_PARTIAL();
2586 ph10 462 break;
2587 ph10 463 }
2588 nigel 77 c = *eptr;
2589     if ((data[c/8] & (1 << (c&7))) == 0) break;
2590     eptr++;
2591     }
2592     while (eptr >= pp)
2593     {
2594 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2595 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2596 nigel 77 eptr--;
2597     }
2598     }
2599    
2600 ph10 510 MRRETURN(MATCH_NOMATCH);
2601 nigel 77 }
2602     }
2603     /* Control never gets here */
2604    
2605    
2606     /* Match an extended character class. This opcode is encountered only
2607 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2608     mode, because Unicode properties are supported in non-UTF-8 mode. */
2609 nigel 77
2610     #ifdef SUPPORT_UTF8
2611     case OP_XCLASS:
2612     {
2613     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2614     ecode += GET(ecode, 1); /* Advance past the item */
2615    
2616     switch (*ecode)
2617     {
2618     case OP_CRSTAR:
2619     case OP_CRMINSTAR:
2620     case OP_CRPLUS:
2621     case OP_CRMINPLUS:
2622     case OP_CRQUERY:
2623     case OP_CRMINQUERY:
2624     c = *ecode++ - OP_CRSTAR;
2625     minimize = (c & 1) != 0;
2626     min = rep_min[c]; /* Pick up values from tables; */
2627     max = rep_max[c]; /* zero for max => infinity */
2628     if (max == 0) max = INT_MAX;
2629     break;
2630    
2631     case OP_CRRANGE:
2632     case OP_CRMINRANGE:
2633     minimize = (*ecode == OP_CRMINRANGE);
2634     min = GET2(ecode, 1);
2635     max = GET2(ecode, 3);
2636     if (max == 0) max = INT_MAX;
2637     ecode += 5;
2638     break;
2639    
2640     default: /* No repeat follows */
2641     min = max = 1;
2642     break;
2643     }
2644    
2645     /* First, ensure the minimum number of matches are present. */
2646    
2647     for (i = 1; i <= min; i++)
2648     {
2649 ph10 427 if (eptr >= md->end_subject)
2650 ph10 426 {
2651     SCHECK_PARTIAL();
2652 ph10 510 MRRETURN(MATCH_NOMATCH);
2653 ph10 427 }
2654 ph10 384 GETCHARINCTEST(c, eptr);
2655 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2656 nigel 77 }
2657    
2658     /* If max == min we can continue with the main loop without the
2659     need to recurse. */
2660    
2661     if (min == max) continue;
2662    
2663     /* If minimizing, keep testing the rest of the expression and advancing
2664     the pointer while it matches the class. */
2665    
2666     if (minimize)
2667     {
2668     for (fi = min;; fi++)
2669     {
2670 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2671 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2673 ph10 427 if (eptr >= md->end_subject)
2674 ph10 426 {
2675 ph10 427 SCHECK_PARTIAL();
2676 ph10 510 MRRETURN(MATCH_NOMATCH);
2677 ph10 427 }
2678 ph10 384 GETCHARINCTEST(c, eptr);
2679 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2680 nigel 77 }
2681     /* Control never gets here */
2682     }
2683    
2684     /* If maximizing, find the longest possible run, then work backwards. */
2685    
2686     else
2687     {
2688     pp = eptr;
2689     for (i = min; i < max; i++)
2690     {
2691     int len = 1;
2692 ph10 463 if (eptr >= md->end_subject)
2693 ph10 462 {
2694 ph10 463 SCHECK_PARTIAL();
2695 ph10 462 break;
2696 ph10 463 }
2697 ph10 384 GETCHARLENTEST(c, eptr, len);
2698 nigel 77 if (!_pcre_xclass(c, data)) break;
2699     eptr += len;
2700     }
2701     for(;;)
2702     {
2703 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2704 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2705     if (eptr-- == pp) break; /* Stop if tried at original pos */
2706 ph10 214 if (utf8) BACKCHAR(eptr);
2707 nigel 77 }
2708 ph10 510 MRRETURN(MATCH_NOMATCH);
2709 nigel 77 }
2710    
2711     /* Control never gets here */
2712     }
2713     #endif /* End of XCLASS */
2714    
2715     /* Match a single character, casefully */
2716    
2717     case OP_CHAR:
2718     #ifdef SUPPORT_UTF8
2719     if (utf8)
2720     {
2721     length = 1;
2722     ecode++;
2723     GETCHARLEN(fc, ecode, length);
2724 ph10 443 if (length > md->end_subject - eptr)
2725 ph10 428 {
2726     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2727 ph10 510 MRRETURN(MATCH_NOMATCH);
2728 ph10 443 }
2729 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2730 nigel 77 }
2731     else
2732     #endif
2733    
2734     /* Non-UTF-8 mode */
2735     {
2736 ph10 443 if (md->end_subject - eptr < 1)
2737 ph10 428 {
2738     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2739 ph10 510 MRRETURN(MATCH_NOMATCH);
2740 ph10 443 }
2741 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2742 nigel 77 ecode += 2;
2743     }
2744     break;
2745    
2746     /* Match a single character, caselessly */
2747    
2748     case OP_CHARNC:
2749     #ifdef SUPPORT_UTF8
2750     if (utf8)
2751     {
2752     length = 1;
2753     ecode++;
2754     GETCHARLEN(fc, ecode, length);
2755    
2756 ph10 443 if (length > md->end_subject - eptr)
2757 ph10 428 {
2758     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2759 ph10 510 MRRETURN(MATCH_NOMATCH);
2760 ph10 443 }
2761 nigel 77
2762     /* If the pattern character's value is < 128, we have only one byte, and
2763     can use the fast lookup table. */
2764    
2765     if (fc < 128)
2766     {
2767 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2768 nigel 77 }
2769    
2770     /* Otherwise we must pick up the subject character */
2771    
2772     else
2773     {
2774 nigel 93 unsigned int dc;
2775 nigel 77 GETCHARINC(dc, eptr);
2776     ecode += length;
2777    
2778     /* If we have Unicode property support, we can use it to test the other
2779 nigel 87 case of the character, if there is one. */
2780 nigel 77
2781     if (fc != dc)
2782     {
2783     #ifdef SUPPORT_UCP
2784 ph10 349 if (dc != UCD_OTHERCASE(fc))
2785 nigel 77 #endif
2786 ph10 510 MRRETURN(MATCH_NOMATCH);
2787 nigel 77 }
2788     }
2789     }
2790     else
2791     #endif /* SUPPORT_UTF8 */
2792    
2793     /* Non-UTF-8 mode */
2794     {
2795 ph10 443 if (md->end_subject - eptr < 1)
2796 ph10 428 {
2797 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2798 ph10 510 MRRETURN(MATCH_NOMATCH);
2799 ph10 443 }
2800 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2801 nigel 77 ecode += 2;
2802     }
2803     break;
2804    
2805 nigel 93 /* Match a single character repeatedly. */
2806 nigel 77
2807     case OP_EXACT:
2808     min = max = GET2(ecode, 1);
2809     ecode += 3;
2810     goto REPEATCHAR;
2811    
2812 nigel 93 case OP_POSUPTO:
2813     possessive = TRUE;
2814     /* Fall through */
2815    
2816 nigel 77 case OP_UPTO:
2817     case OP_MINUPTO:
2818     min = 0;
2819     max = GET2(ecode, 1);
2820     minimize = *ecode == OP_MINUPTO;
2821     ecode += 3;
2822     goto REPEATCHAR;
2823    
2824 nigel 93 case OP_POSSTAR:
2825     possessive = TRUE;
2826     min = 0;
2827     max = INT_MAX;
2828     ecode++;
2829     goto REPEATCHAR;
2830    
2831     case OP_POSPLUS:
2832     possessive = TRUE;
2833     min = 1;
2834     max = INT_MAX;
2835     ecode++;
2836     goto REPEATCHAR;
2837    
2838     case OP_POSQUERY:
2839     possessive = TRUE;
2840     min = 0;
2841     max = 1;
2842     ecode++;
2843     goto REPEATCHAR;
2844    
2845 nigel 77 case OP_STAR:
2846     case OP_MINSTAR:
2847     case OP_PLUS:
2848     case OP_MINPLUS:
2849     case OP_QUERY:
2850     case OP_MINQUERY:
2851     c = *ecode++ - OP_STAR;
2852     minimize = (c & 1) != 0;
2853 ph10 443
2854 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2855     max = rep_max[c]; /* zero for max => infinity */
2856     if (max == 0) max = INT_MAX;
2857    
2858 ph10 426 /* Common code for all repeated single-character matches. */
2859 nigel 77
2860     REPEATCHAR:
2861     #ifdef SUPPORT_UTF8
2862     if (utf8)
2863     {
2864     length = 1;
2865     charptr = ecode;
2866     GETCHARLEN(fc, ecode, length);
2867     ecode += length;
2868    
2869     /* Handle multibyte character matching specially here. There is
2870     support for caseless matching if UCP support is present. */
2871    
2872     if (length > 1)
2873     {
2874     #ifdef SUPPORT_UCP
2875 nigel 93 unsigned int othercase;
2876 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2877 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2878 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2879 ph10 115 else oclength = 0;
2880 nigel 77 #endif /* SUPPORT_UCP */
2881    
2882     for (i = 1; i <= min; i++)
2883     {
2884 ph10 426 if (eptr <= md->end_subject - length &&
2885     memcmp(eptr, charptr, length) == 0) eptr += length;
2886 ph10 123 #ifdef SUPPORT_UCP
2887 ph10 426 else if (oclength > 0 &&
2888     eptr <= md->end_subject - oclength &&
2889     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2890     #endif /* SUPPORT_UCP */
2891 nigel 77 else
2892     {
2893 ph10 426 CHECK_PARTIAL();
2894 ph10 510 MRRETURN(MATCH_NOMATCH);
2895 nigel 77 }
2896     }
2897    
2898     if (min == max) continue;
2899    
2900     if (minimize)
2901     {
2902     for (fi = min;; fi++)
2903     {
2904 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2905 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2906 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2907 ph10 426 if (eptr <= md->end_subject - length &&
2908     memcmp(eptr, charptr, length) == 0) eptr += length;
2909 ph10 123 #ifdef SUPPORT_UCP
2910 ph10 426 else if (oclength > 0 &&
2911     eptr <= md->end_subject - oclength &&
2912     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913     #endif /* SUPPORT_UCP */
2914 nigel 77 else
2915     {
2916 ph10 426 CHECK_PARTIAL();
2917 ph10 510 MRRETURN(MATCH_NOMATCH);
2918 nigel 77 }
2919     }
2920     /* Control never gets here */
2921     }
2922 nigel 93
2923     else /* Maximize */
2924 nigel 77 {
2925     pp = eptr;
2926     for (i = min; i < max; i++)
2927     {
2928 ph10 426 if (eptr <= md->end_subject - length &&
2929     memcmp(eptr, charptr, length) == 0) eptr += length;
2930 ph10 123 #ifdef SUPPORT_UCP
2931 ph10 426 else if (oclength > 0 &&
2932     eptr <= md->end_subject - oclength &&
2933     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2934     #endif /* SUPPORT_UCP */
2935 ph10 463 else
2936 ph10 462 {
2937 ph10 463 CHECK_PARTIAL();
2938 ph10 462 break;
2939 ph10 463 }
2940 nigel 77 }
2941 nigel 93
2942     if (possessive) continue;
2943 ph10 427
2944 ph10 120 for(;;)
2945 ph10 426 {
2946     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2947     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2949 ph10 115 #ifdef SUPPORT_UCP
2950 ph10 426 eptr--;
2951     BACKCHAR(eptr);
2952 ph10 123 #else /* without SUPPORT_UCP */
2953 ph10 426 eptr -= length;
2954 ph10 123 #endif /* SUPPORT_UCP */
2955 ph10 426 }
2956 nigel 77 }
2957     /* Control never gets here */
2958     }
2959    
2960     /* If the length of a UTF-8 character is 1, we fall through here, and
2961     obey the code as for non-UTF-8 characters below, though in this case the
2962     value of fc will always be < 128. */
2963     }
2964     else
2965     #endif /* SUPPORT_UTF8 */
2966    
2967     /* When not in UTF-8 mode, load a single-byte character. */
2968    
2969 ph10 426 fc = *ecode++;
2970 ph10 443
2971 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2972     may not be in UTF-8 mode. The code is duplicated for the caseless and
2973     caseful cases, for speed, since matching characters is likely to be quite
2974     common. First, ensure the minimum number of matches are present. If min =
2975     max, continue at the same level without recursing. Otherwise, if
2976     minimizing, keep trying the rest of the expression and advancing one
2977     matching character if failing, up to the maximum. Alternatively, if
2978     maximizing, find the maximum number of characters and work backwards. */
2979    
2980     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2981     max, eptr));
2982    
2983     if ((ims & PCRE_CASELESS) != 0)
2984     {
2985     fc = md->lcc[fc];
2986     for (i = 1; i <= min; i++)
2987 ph10 426 {
2988     if (eptr >= md->end_subject)
2989     {
2990     SCHECK_PARTIAL();
2991 ph10 510 MRRETURN(MATCH_NOMATCH);
2992 ph10 426 }
2993 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2994 ph10 426 }
2995 nigel 77 if (min == max) continue;
2996     if (minimize)
2997     {
2998     for (fi = min;; fi++)
2999     {
3000 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
3001 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3002 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3003 ph10 426 if (eptr >= md->end_subject)
3004     {
3005 ph10 427 SCHECK_PARTIAL();
3006 ph10 510 MRRETURN(MATCH_NOMATCH);
3007 ph10 426 }
3008 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3009 nigel 77 }
3010     /* Control never gets here */
3011     }
3012 nigel 93 else /* Maximize */
3013 nigel 77 {
3014     pp = eptr;
3015     for (i = min; i < max; i++)
3016     {
3017 ph10 463 if (eptr >= md->end_subject)
3018 ph10 462 {
3019     SCHECK_PARTIAL();
3020     break;
3021 ph10 463 }
3022 ph10 462 if (fc != md->lcc[*eptr]) break;
3023 nigel 77 eptr++;
3024     }
3025 ph10 427
3026 nigel 93 if (possessive) continue;
3027 ph10 427
3028 nigel 77 while (eptr >= pp)
3029     {
3030 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3031 nigel 77 eptr--;
3032     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3033     }
3034 ph10 510 MRRETURN(MATCH_NOMATCH);
3035 nigel 77 }
3036     /* Control never gets here */
3037     }
3038    
3039     /* Caseful comparisons (includes all multi-byte characters) */
3040    
3041     else
3042     {
3043 ph10 427 for (i = 1; i <= min; i++)
3044 ph10 426 {
3045     if (eptr >= md->end_subject)
3046     {
3047     SCHECK_PARTIAL();
3048 ph10 510 MRRETURN(MATCH_NOMATCH);
3049 ph10 426 }
3050 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051 ph10 427 }
3052 ph10 443
3053 nigel 77 if (min == max) continue;
3054 ph10 443
3055 nigel 77 if (minimize)
3056     {
3057     for (fi = min;; fi++)
3058     {
3059 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3060 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3061 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3062 ph10 426 if (eptr >= md->end_subject)
3063 ph10 427 {
3064 ph10 426 SCHECK_PARTIAL();
3065 ph10 510 MRRETURN(MATCH_NOMATCH);
3066 ph10 427 }
3067 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3068 nigel 77 }
3069     /* Control never gets here */
3070     }
3071 nigel 93 else /* Maximize */
3072 nigel 77 {
3073     pp = eptr;
3074     for (i = min; i < max; i++)
3075     {
3076 ph10 463 if (eptr >= md->end_subject)
3077 ph10 462 {
3078 ph10 463 SCHECK_PARTIAL();
3079 ph10 462 break;
3080 ph10 463 }
3081 ph10 462 if (fc != *eptr) break;
3082 nigel 77 eptr++;
3083     }
3084 nigel 93 if (possessive) continue;
3085 ph10 443
3086 nigel 77 while (eptr >= pp)
3087     {
3088 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3089 nigel 77 eptr--;
3090     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3091     }
3092 ph10 510 MRRETURN(MATCH_NOMATCH);
3093 nigel 77 }
3094     }
3095     /* Control never gets here */
3096    
3097     /* Match a negated single one-byte character. The character we are
3098     checking can be multibyte. */
3099    
3100     case OP_NOT:
3101 ph10 443 if (eptr >= md->end_subject)
3102 ph10 428 {
3103 ph10 443 SCHECK_PARTIAL();
3104 ph10 510 MRRETURN(MATCH_NOMATCH);
3105 ph10 443 }
3106 nigel 77 ecode++;
3107     GETCHARINCTEST(c, eptr);
3108     if ((ims & PCRE_CASELESS) != 0)
3109     {
3110     #ifdef SUPPORT_UTF8
3111     if (c < 256)
3112     #endif
3113     c = md->lcc[c];
3114 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3115 nigel 77 }
3116     else
3117     {
3118 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3119 nigel 77 }
3120     break;
3121    
3122     /* Match a negated single one-byte character repeatedly. This is almost a
3123     repeat of the code for a repeated single character, but I haven't found a
3124     nice way of commoning these up that doesn't require a test of the
3125     positive/negative option for each character match. Maybe that wouldn't add
3126     very much to the time taken, but character matching *is* what this is all
3127     about... */
3128    
3129     case OP_NOTEXACT:
3130     min = max = GET2(ecode, 1);
3131     ecode += 3;
3132     goto REPEATNOTCHAR;
3133    
3134     case OP_NOTUPTO:
3135     case OP_NOTMINUPTO:
3136     min = 0;
3137     max = GET2(ecode, 1);
3138     minimize = *ecode == OP_NOTMINUPTO;
3139     ecode += 3;
3140     goto REPEATNOTCHAR;
3141    
3142 nigel 93 case OP_NOTPOSSTAR:
3143     possessive = TRUE;
3144     min = 0;
3145     max = INT_MAX;
3146     ecode++;
3147     goto REPEATNOTCHAR;
3148    
3149     case OP_NOTPOSPLUS:
3150     possessive = TRUE;
3151     min = 1;
3152     max = INT_MAX;
3153     ecode++;
3154     goto REPEATNOTCHAR;
3155    
3156     case OP_NOTPOSQUERY:
3157     possessive = TRUE;
3158     min = 0;
3159     max = 1;
3160     ecode++;
3161     goto REPEATNOTCHAR;
3162    
3163     case OP_NOTPOSUPTO:
3164     possessive = TRUE;
3165     min = 0;
3166     max = GET2(ecode, 1);
3167     ecode += 3;
3168     goto REPEATNOTCHAR;
3169    
3170 nigel 77 case OP_NOTSTAR:
3171     case OP_NOTMINSTAR:
3172     case OP_NOTPLUS:
3173     case OP_NOTMINPLUS:
3174     case OP_NOTQUERY:
3175     case OP_NOTMINQUERY:
3176     c = *ecode++ - OP_NOTSTAR;
3177     minimize = (c & 1) != 0;
3178     min = rep_min[c]; /* Pick up values from tables; */
3179     max = rep_max[c]; /* zero for max => infinity */
3180     if (max == 0) max = INT_MAX;
3181    
3182 ph10 426 /* Common code for all repeated single-byte matches. */
3183 nigel 77
3184     REPEATNOTCHAR:
3185     fc = *ecode++;
3186    
3187     /* The code is duplicated for the caseless and caseful cases, for speed,
3188     since matching characters is likely to be quite common. First, ensure the
3189     minimum number of matches are present. If min = max, continue at the same
3190     level without recursing. Otherwise, if minimizing, keep trying the rest of
3191     the expression and advancing one matching character if failing, up to the
3192     maximum. Alternatively, if maximizing, find the maximum number of
3193     characters and work backwards. */
3194    
3195     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3196     max, eptr));
3197    
3198     if ((ims & PCRE_CASELESS) != 0)
3199     {
3200     fc = md->lcc[fc];
3201    
3202     #ifdef SUPPORT_UTF8
3203     /* UTF-8 mode */
3204     if (utf8)
3205     {
3206 nigel 93 register unsigned int d;
3207 nigel 77 for (i = 1; i <= min; i++)
3208     {
3209 ph10 426 if (eptr >= md->end_subject)
3210     {
3211     SCHECK_PARTIAL();
3212 ph10 510 MRRETURN(MATCH_NOMATCH);
3213 ph10 427 }
3214 nigel 77 GETCHARINC(d, eptr);
3215     if (d < 256) d = md->lcc[d];
3216 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3217 nigel 77 }
3218     }
3219     else
3220     #endif
3221    
3222     /* Not UTF-8 mode */
3223     {
3224     for (i = 1; i <= min; i++)
3225 ph10 426 {
3226     if (eptr >= md->end_subject)
3227     {
3228     SCHECK_PARTIAL();
3229 ph10 510 MRRETURN(MATCH_NOMATCH);
3230 ph10 427 }
3231 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3232 ph10 427 }
3233 nigel 77 }
3234    
3235     if (min == max) continue;
3236    
3237     if (minimize)
3238     {
3239     #ifdef SUPPORT_UTF8
3240     /* UTF-8 mode */
3241     if (utf8)
3242     {
3243 nigel 93 register unsigned int d;
3244 nigel 77 for (fi = min;; fi++)
3245     {
3246 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3247 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3248 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3249 ph10 427 if (eptr >= md->end_subject)
3250 ph10 426 {
3251 ph10 427 SCHECK_PARTIAL();
3252 ph10 510 MRRETURN(MATCH_NOMATCH);
3253 ph10 427 }
3254 nigel 77 GETCHARINC(d, eptr);
3255     if (d < 256) d = md->lcc[d];
3256 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3257 nigel 77 }
3258     }
3259     else
3260     #endif
3261     /* Not UTF-8 mode */
3262     {
3263     for (fi = min;; fi++)
3264     {
3265 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3266 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3267 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3268 ph10 426 if (eptr >= md->end_subject)
3269     {
3270     SCHECK_PARTIAL();
3271 ph10 510 MRRETURN(MATCH_NOMATCH);
3272 ph10 426 }
3273 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3274 nigel 77 }
3275     }
3276     /* Control never gets here */
3277     }
3278    
3279     /* Maximize case */
3280    
3281     else
3282     {
3283     pp = eptr;
3284    
3285     #ifdef SUPPORT_UTF8
3286     /* UTF-8 mode */
3287     if (utf8)
3288     {
3289 nigel 93 register unsigned int d;
3290 nigel 77 for (i = min; i < max; i++)
3291     {
3292     int len = 1;
3293 ph10 463 if (eptr >= md->end_subject)
3294 ph10 462 {
3295 ph10 463 SCHECK_PARTIAL();
3296 ph10 462 break;
3297 ph10 463 }
3298 nigel 77 GETCHARLEN(d, eptr, len);
3299     if (d < 256) d = md->lcc[d];
3300     if (fc == d) break;
3301     eptr += len;
3302     }
3303 nigel 93 if (possessive) continue;
3304     for(;;)
3305 nigel 77 {
3306 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3307 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3308     if (eptr-- == pp) break; /* Stop if tried at original pos */
3309     BACKCHAR(eptr);
3310     }
3311     }
3312     else
3313     #endif
3314     /* Not UTF-8 mode */
3315     {
3316     for (i = min; i < max; i++)
3317     {
3318 ph10 463 if (eptr >= md->end_subject)
3319 ph10 462 {
3320     SCHECK_PARTIAL();
3321     break;
3322 ph10 463 }
3323 ph10 462 if (fc == md->lcc[*eptr]) break;
3324 nigel 77 eptr++;
3325     }
3326 nigel 93 if (possessive) continue;
3327 nigel 77 while (eptr >= pp)
3328     {
3329 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3330 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3331     eptr--;
3332     }
3333     }
3334    
3335 ph10 510 MRRETURN(MATCH_NOMATCH);
3336 nigel 77 }
3337     /* Control never gets here */
3338     }
3339    
3340     /* Caseful comparisons */
3341    
3342     else
3343     {
3344     #ifdef SUPPORT_UTF8
3345     /* UTF-8 mode */
3346     if (utf8)
3347     {
3348 nigel 93 register unsigned int d;
3349 nigel 77 for (i = 1; i <= min; i++)
3350     {
3351 ph10 426 if (eptr >= md->end_subject)
3352     {
3353     SCHECK_PARTIAL();
3354 ph10 510 MRRETURN(MATCH_NOMATCH);
3355 ph10 427 }
3356 nigel 77 GETCHARINC(d, eptr);
3357 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3358 nigel 77 }
3359     }
3360     else
3361     #endif
3362     /* Not UTF-8 mode */
3363     {
3364     for (i = 1; i <= min; i++)
3365 ph10 426 {
3366     if (eptr >= md->end_subject)
3367     {
3368     SCHECK_PARTIAL();
3369 ph10 510 MRRETURN(MATCH_NOMATCH);
3370 ph10 427 }
3371 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3372 ph10 427 }
3373 nigel 77 }
3374    
3375     if (min == max) continue;
3376    
3377     if (minimize)
3378     {
3379     #ifdef SUPPORT_UTF8
3380     /* UTF-8 mode */
3381     if (utf8)
3382     {
3383 nigel 93 register unsigned int d;
3384 nigel 77 for (fi = min;; fi++)
3385     {
3386 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3387 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3389 ph10 427 if (eptr >= md->end_subject)
3390 ph10 426 {
3391 ph10 427 SCHECK_PARTIAL();
3392 ph10 510 MRRETURN(MATCH_NOMATCH);
3393 ph10 427 }
3394 nigel 77 GETCHARINC(d, eptr);
3395 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3396 nigel 77 }
3397     }
3398     else
3399     #endif
3400     /* Not UTF-8 mode */
3401     {
3402     for (fi = min;; fi++)
3403     {
3404 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3405 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3406 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3407 ph10 426 if (eptr >= md->end_subject)
3408     {
3409     SCHECK_PARTIAL();
3410 ph10 510 MRRETURN(MATCH_NOMATCH);
3411 ph10 427 }
3412 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3413 nigel 77 }
3414     }
3415     /* Control never gets here */
3416     }
3417    
3418     /* Maximize case */
3419    
3420     else
3421     {
3422     pp = eptr;
3423    
3424     #ifdef SUPPORT_UTF8
3425     /* UTF-8 mode */
3426     if (utf8)
3427     {
3428 nigel 93 register unsigned int d;
3429 nigel 77 for (i = min; i < max; i++)
3430     {
3431     int len = 1;
3432 ph10 463 if (eptr >= md->end_subject)
3433 ph10 462 {
3434 ph10 463 SCHECK_PARTIAL();
3435 ph10 462 break;
3436 ph10 463 }
3437 nigel 77 GETCHARLEN(d, eptr, len);
3438     if (fc == d) break;
3439     eptr += len;
3440     }
3441 nigel 93 if (possessive) continue;
3442 nigel 77 for(;;)
3443     {
3444 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3445 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3446     if (eptr-- == pp) break; /* Stop if tried at original pos */
3447     BACKCHAR(eptr);
3448     }
3449     }
3450     else
3451     #endif
3452     /* Not UTF-8 mode */
3453     {
3454     for (i = min; i < max; i++)
3455     {
3456 ph10 463 if (eptr >= md->end_subject)
3457 ph10 462 {
3458 ph10 463 SCHECK_PARTIAL();
3459 ph10 462 break;
3460 ph10 463 }
3461 ph10 462 if (fc == *eptr) break;
3462 nigel 77 eptr++;
3463     }
3464 nigel 93 if (possessive) continue;
3465 nigel 77 while (eptr >= pp)
3466     {
3467 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3468 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469     eptr--;
3470     }
3471     }
3472    
3473 ph10 510 MRRETURN(MATCH_NOMATCH);
3474 nigel 77 }
3475     }
3476     /* Control never gets here */
3477    
3478     /* Match a single character type repeatedly; several different opcodes
3479     share code. This is very similar to the code for single characters, but we
3480     repeat it in the interests of efficiency. */
3481    
3482     case OP_TYPEEXACT:
3483     min = max = GET2(ecode, 1);
3484     minimize = TRUE;
3485     ecode += 3;
3486     goto REPEATTYPE;
3487    
3488     case OP_TYPEUPTO:
3489     case OP_TYPEMINUPTO:
3490     min = 0;
3491     max = GET2(ecode, 1);
3492     minimize = *ecode == OP_TYPEMINUPTO;
3493     ecode += 3;
3494     goto REPEATTYPE;
3495    
3496 nigel 93 case OP_TYPEPOSSTAR:
3497     possessive = TRUE;
3498     min = 0;
3499     max = INT_MAX;
3500     ecode++;
3501     goto REPEATTYPE;
3502    
3503     case OP_TYPEPOSPLUS:
3504     possessive = TRUE;
3505     min = 1;
3506     max = INT_MAX;
3507     ecode++;
3508     goto REPEATTYPE;
3509    
3510     case OP_TYPEPOSQUERY:
3511     possessive = TRUE;
3512     min = 0;
3513     max = 1;
3514     ecode++;
3515     goto REPEATTYPE;
3516    
3517     case OP_TYPEPOSUPTO:
3518     possessive = TRUE;
3519     min = 0;
3520     max = GET2(ecode, 1);
3521     ecode += 3;
3522     goto REPEATTYPE;
3523    
3524 nigel 77 case OP_TYPESTAR:
3525     case OP_TYPEMINSTAR:
3526     case OP_TYPEPLUS:
3527     case OP_TYPEMINPLUS:
3528     case OP_TYPEQUERY:
3529     case OP_TYPEMINQUERY:
3530     c = *ecode++ - OP_TYPESTAR;
3531     minimize = (c & 1) != 0;
3532     min = rep_min[c]; /* Pick up values from tables; */
3533     max = rep_max[c]; /* zero for max => infinity */
3534     if (max == 0) max = INT_MAX;
3535    
3536     /* Common code for all repeated single character type matches. Note that
3537     in UTF-8 mode, '.' matches a character of any length, but for the other
3538     character types, the valid characters are all one-byte long. */
3539    
3540     REPEATTYPE:
3541     ctype = *ecode++; /* Code for the character type */
3542    
3543     #ifdef SUPPORT_UCP
3544     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3545     {
3546     prop_fail_result = ctype == OP_NOTPROP;
3547     prop_type = *ecode++;
3548 nigel 87 prop_value = *ecode++;
3549 nigel 77 }
3550     else prop_type = -1;
3551     #endif
3552    
3553     /* First, ensure the minimum number of matches are present. Use inline
3554     code for maximizing the speed, and do the type test once at the start
3555 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3556 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3557     and single-bytes. */
3558    
3559     if (min > 0)
3560     {
3561     #ifdef SUPPORT_UCP
3562 nigel 87 if (prop_type >= 0)
3563 nigel 77 {
3564 nigel 87 switch(prop_type)
3565 nigel 77 {
3566 nigel 87 case PT_ANY:
3567 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3568 nigel 87 for (i = 1; i <= min; i++)
3569     {
3570 ph10 427 if (eptr >= md->end_subject)
3571 ph10 426 {
3572 ph10 427 SCHECK_PARTIAL();
3573 ph10 510 MRRETURN(MATCH_NOMATCH);
3574 ph10 427 }
3575 ph10 184 GETCHARINCTEST(c, eptr);
3576 nigel 87 }
3577     break;
3578    
3579     case PT_LAMP:
3580     for (i = 1; i <= min; i++)
3581     {
3582 ph10 427 if (eptr >= md->end_subject)
3583 ph10 426 {
3584 ph10 427 SCHECK_PARTIAL();
3585 ph10 510 MRRETURN(MATCH_NOMATCH);
3586 ph10 427 }
3587 ph10 184 GETCHARINCTEST(c, eptr);
3588 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3589 nigel 87 if ((prop_chartype == ucp_Lu ||
3590     prop_chartype == ucp_Ll ||
3591     prop_chartype == ucp_Lt) == prop_fail_result)
3592 ph10 510 MRRETURN(MATCH_NOMATCH);
3593 nigel 87 }
3594     break;
3595    
3596     case PT_GC:
3597     for (i = 1; i <= min; i++)
3598     {
3599 ph10 427 if (eptr >= md->end_subject)
3600 ph10 426 {
3601 ph10 427 SCHECK_PARTIAL();
3602 ph10 510 MRRETURN(MATCH_NOMATCH);
3603 ph10 427 }
3604 ph10 184 GETCHARINCTEST(c, eptr);
3605 ph10 349 prop_category = UCD_CATEGORY(c);
3606 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3607 ph10 510 MRRETURN(MATCH_NOMATCH);
3608 nigel 87 }
3609     break;
3610    
3611     case PT_PC:
3612     for (i = 1; i <= min; i++)
3613     {
3614 ph10 427 if (eptr >= md->end_subject)
3615 ph10 426 {
3616 ph10 427 SCHECK_PARTIAL();
3617 ph10 510 MRRETURN(MATCH_NOMATCH);
3618 ph10 427 }
3619 ph10 184 GETCHARINCTEST(c, eptr);
3620 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3621 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3622 ph10 510 MRRETURN(MATCH_NOMATCH);
3623 nigel 87 }
3624     break;
3625    
3626     case PT_SC:
3627     for (i = 1; i <= min; i++)
3628     {
3629 ph10 427 if (eptr >= md->end_subject)
3630 ph10 426 {
3631 ph10 427 SCHECK_PARTIAL();
3632 ph10 510 MRRETURN(MATCH_NOMATCH);
3633 ph10 427 }
3634 ph10 184 GETCHARINCTEST(c, eptr);
3635 ph10 349 prop_script = UCD_SCRIPT(c);
3636 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3637 ph10 510 MRRETURN(MATCH_NOMATCH);
3638 nigel 87 }
3639     break;
3640 ph10 527
3641 ph10 517 case PT_ALNUM:
3642     for (i = 1; i <= min; i++)
3643     {
3644     if (eptr >= md->end_subject)
3645     {
3646     SCHECK_PARTIAL();
3647     MRRETURN(MATCH_NOMATCH);
3648     }
3649     GETCHARINCTEST(c, eptr);
3650 ph10 527 prop_category = UCD_CATEGORY(c);
3651     if ((prop_category == ucp_L || prop_category == ucp_N)
3652 ph10 517 == prop_fail_result)
3653     MRRETURN(MATCH_NOMATCH);
3654     }
3655     break;
3656 ph10 527
3657 ph10 517 case PT_SPACE: /* Perl space */
3658     for (i = 1; i <= min; i++)
3659     {
3660     if (eptr >= md->end_subject)
3661     {
3662     SCHECK_PARTIAL();
3663     MRRETURN(MATCH_NOMATCH);
3664     }
3665     GETCHARINCTEST(c, eptr);
3666 ph10 527 prop_category = UCD_CATEGORY(c);
3667     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668     c == CHAR_FF || c == CHAR_CR)
3669 ph10 517 == prop_fail_result)
3670     MRRETURN(MATCH_NOMATCH);
3671     }
3672     break;
3673 ph10 527
3674 ph10 517 case PT_PXSPACE: /* POSIX space */
3675     for (i = 1; i <= min; i++)
3676     {
3677     if (eptr >= md->end_subject)
3678     {
3679     SCHECK_PARTIAL();
3680     MRRETURN(MATCH_NOMATCH);
3681     }
3682     GETCHARINCTEST(c, eptr);
3683 ph10 527 prop_category = UCD_CATEGORY(c);
3684     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3685     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3686 ph10 517 == prop_fail_result)
3687     MRRETURN(MATCH_NOMATCH);
3688     }
3689     break;
3690 ph10 527
3691     case PT_WORD:
3692 ph10 517 for (i = 1; i <= min; i++)
3693     {
3694     if (eptr >= md->end_subject)
3695     {
3696     SCHECK_PARTIAL();
3697     MRRETURN(MATCH_NOMATCH);
3698     }
3699     GETCHARINCTEST(c, eptr);
3700 ph10 527 prop_category = UCD_CATEGORY(c);
3701 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3702 ph10 527 c == CHAR_UNDERSCORE)
3703 ph10 517 == prop_fail_result)
3704     MRRETURN(MATCH_NOMATCH);
3705     }
3706     break;
3707 ph10 527
3708 ph10 517 /* This should not occur */
3709 nigel 87
3710     default:
3711     RRETURN(PCRE_ERROR_INTERNAL);
3712 nigel 77 }
3713     }
3714    
3715     /* Match extended Unicode sequences. We will get here only if the
3716     support is in the binary; otherwise a compile-time error occurs. */
3717    
3718     else if (ctype == OP_EXTUNI)
3719     {
3720     for (i = 1; i <= min; i++)
3721     {
3722 ph10 427 if (eptr >= md->end_subject)
3723 ph10 426 {
3724 ph10 427 SCHECK_PARTIAL();
3725 ph10 510 MRRETURN(MATCH_NOMATCH);
3726 ph10 427 }
3727 nigel 77 GETCHARINCTEST(c, eptr);
3728 ph10 349 prop_category = UCD_CATEGORY(c);
3729 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3730 nigel 77 while (eptr < md->end_subject)
3731     {
3732     int len = 1;
3733 ph10 426 if (!utf8) c = *eptr;
3734     else { GETCHARLEN(c, eptr, len); }
3735 ph10 349 prop_category = UCD_CATEGORY(c);
3736 nigel 77 if (prop_category != ucp_M) break;
3737     eptr += len;
3738     }
3739     }
3740     }
3741    
3742     else
3743     #endif /* SUPPORT_UCP */
3744    
3745     /* Handle all other cases when the coding is UTF-8 */
3746    
3747     #ifdef SUPPORT_UTF8
3748     if (utf8) switch(ctype)
3749     {
3750     case OP_ANY:
3751     for (i = 1; i <= min; i++)
3752     {
3753 ph10 426 if (eptr >= md->end_subject)
3754     {
3755 ph10 427 SCHECK_PARTIAL();
3756 ph10 510 MRRETURN(MATCH_NOMATCH);
3757 ph10 427 }
3758 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3759 nigel 91 eptr++;
3760 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3761     }
3762     break;
3763    
3764 ph10 341 case OP_ALLANY:
3765     for (i = 1; i <= min; i++)
3766     {
3767 ph10 427 if (eptr >= md->end_subject)
3768 ph10 426 {
3769     SCHECK_PARTIAL();
3770 ph10 510 MRRETURN(MATCH_NOMATCH);
3771 ph10 427 }
3772 ph10 341 eptr++;
3773     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3774     }
3775     break;
3776    
3777 nigel 77 case OP_ANYBYTE:
3778 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3779 nigel 77 eptr += min;
3780     break;
3781    
3782 nigel 93 case OP_ANYNL:
3783     for (i = 1; i <= min; i++)
3784     {
3785 ph10 427 if (eptr >= md->end_subject)
3786 ph10 426 {
3787     SCHECK_PARTIAL();
3788 ph10 510 MRRETURN(MATCH_NOMATCH);
3789 ph10 427 }
3790 nigel 93 GETCHARINC(c, eptr);
3791     switch(c)
3792     {
3793 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3794 nigel 93 case 0x000d:
3795     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3796     break;
3797 ph10 231
3798 nigel 93 case 0x000a:
3799 ph10 231 break;
3800    
3801 nigel 93 case 0x000b:
3802     case 0x000c:
3803     case 0x0085:
3804     case 0x2028:
3805     case 0x2029:
3806 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3807 nigel 93 break;
3808     }
3809     }
3810     break;
3811    
3812 ph10 178 case OP_NOT_HSPACE:
3813     for (i = 1; i <= min; i++)
3814     {
3815 ph10 427 if (eptr >= md->end_subject)
3816 ph10 426 {
3817     SCHECK_PARTIAL();
3818 ph10 510 MRRETURN(MATCH_NOMATCH);
3819 ph10 427 }
3820 ph10 178 GETCHARINC(c, eptr);
3821     switch(c)
3822     {
3823     default: break;
3824     case 0x09: /* HT */
3825     case 0x20: /* SPACE */
3826     case 0xa0: /* NBSP */
3827     case 0x1680: /* OGHAM SPACE MARK */
3828     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3829     case 0x2000: /* EN QUAD */
3830     case 0x2001: /* EM QUAD */
3831     case 0x2002: /* EN SPACE */
3832     case 0x2003: /* EM SPACE */
3833     case 0x2004: /* THREE-PER-EM SPACE */
3834     case 0x2005: /* FOUR-PER-EM SPACE */
3835     case 0x2006: /* SIX-PER-EM SPACE */
3836     case 0x2007: /* FIGURE SPACE */
3837     case 0x2008: /* PUNCTUATION SPACE */
3838     case 0x2009: /* THIN SPACE */
3839     case 0x200A: /* HAIR SPACE */
3840     case 0x202f: /* NARROW NO-BREAK SPACE */
3841     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3842     case 0x3000: /* IDEOGRAPHIC SPACE */
3843 ph10 510 MRRETURN(MATCH_NOMATCH);
3844 ph10 178 }
3845     }
3846     break;
3847 ph10 182
3848 ph10 178 case OP_HSPACE:
3849     for (i = 1; i <= min; i++)
3850     {