/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 600 - (hide annotations) (download)
Mon May 9 08:54:11 2011 UTC (3 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 188836 byte(s)
Fix backup bug for \R with greedy quantifier.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 598 Copyright (c) 1997-2011 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135 ph10 595 /* Normally, if a back reference hasn't been set, the length that is passed is
136     negative, so the match always fails. However, in JavaScript compatibility mode,
137     the length passed is zero. Note that in caseless UTF-8 mode, the number of
138     subject bytes matched may be different to the number of reference bytes.
139 nigel 77
140     Arguments:
141     offset index into the offset vector
142 ph10 595 eptr pointer into the subject
143     length length of reference to be matched (number of bytes)
144 nigel 77 md points to match data block
145     ims the ims flags
146    
147 ph10 595 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 nigel 77 */
149    
150 ph10 595 static int
151 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 nigel 77 unsigned long int ims)
153     {
154 ph10 595 USPTR eptr_start = eptr;
155     register USPTR p = md->start_subject + md->offset_vector[offset];
156 nigel 77
157 ph10 475 #ifdef PCRE_DEBUG
158 nigel 77 if (eptr >= md->end_subject)
159     printf("matching subject <null>");
160     else
161     {
162     printf("matching subject ");
163     pchars(eptr, length, TRUE, md);
164     }
165     printf(" against backref ");
166     pchars(p, length, FALSE, md);
167     printf("\n");
168     #endif
169    
170 ph10 595 /* Always fail if reference not set (and not JavaScript compatible). */
171 nigel 77
172 ph10 595 if (length < 0) return -1;
173 nigel 77
174 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175     properly if Unicode properties are supported. Otherwise, we can check only
176     ASCII characters. */
177 nigel 77
178     if ((ims & PCRE_CASELESS) != 0)
179     {
180 ph10 354 #ifdef SUPPORT_UTF8
181     #ifdef SUPPORT_UCP
182     if (md->utf8)
183     {
184 ph10 595 /* Match characters up to the end of the reference. NOTE: the number of
185     bytes matched may differ, because there are some characters whose upper and
186     lower case versions code as different numbers of bytes. For example, U+023A
187     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189     the latter. It is important, therefore, to check the length along the
190     reference, not along the subject (earlier code did this wrong). */
191    
192     USPTR endptr = p + length;
193     while (p < endptr)
194 ph10 354 {
195 ph10 358 int c, d;
196 ph10 597 if (eptr >= md->end_subject) return -1;
197 ph10 354 GETCHARINC(c, eptr);
198     GETCHARINC(d, p);
199 ph10 595 if (c != d && c != UCD_OTHERCASE(d)) return -1;
200 ph10 358 }
201     }
202 ph10 354 else
203     #endif
204     #endif
205    
206     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207     is no UCP support. */
208 ph10 597 {
209     if (eptr + length > md->end_subject) return -1;
210     while (length-- > 0)
211     { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212     }
213 nigel 77 }
214 ph10 358
215 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
216     are in UTF-8 mode. */
217 ph10 358
218 nigel 77 else
219 ph10 597 {
220     if (eptr + length > md->end_subject) return -1;
221     while (length-- > 0) if (*p++ != *eptr++) return -1;
222     }
223 nigel 77
224 ph10 595 return eptr - eptr_start;
225 nigel 77 }
226    
227    
228    
229     /***************************************************************************
230     ****************************************************************************
231     RECURSION IN THE match() FUNCTION
232    
233 nigel 87 The match() function is highly recursive, though not every recursive call
234     increases the recursive depth. Nevertheless, some regular expressions can cause
235     it to recurse to a great depth. I was writing for Unix, so I just let it call
236     itself recursively. This uses the stack for saving everything that has to be
237     saved for a recursive call. On Unix, the stack can be large, and this works
238     fine.
239 nigel 77
240 nigel 87 It turns out that on some non-Unix-like systems there are problems with
241     programs that use a lot of stack. (This despite the fact that every last chip
242     has oodles of memory these days, and techniques for extending the stack have
243     been known for decades.) So....
244 nigel 77
245     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246     calls by keeping local variables that need to be preserved in blocks of memory
247 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
248 nigel 77 achieve this so that the actual code doesn't look very different to what it
249     always used to.
250 ph10 164
251 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
252 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
253     Switzer, the use of longjmp() has been abolished, at the cost of having to
254     provide a unique number for each call to RMATCH. There is no way of generating
255     a sequence of numbers at compile time in C. I have given them names, to make
256     them stand out more clearly.
257    
258     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
261     don't have indeterminate values; this has meant that the frame size can be
262 ph10 164 reduced because the result can be "passed back" by straight setting of the
263     variable instead of being passed in the frame.
264 nigel 77 ****************************************************************************
265     ***************************************************************************/
266    
267 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268     below must be updated in sync. */
269 nigel 77
270 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276     RM61, RM62 };
277 ph10 164
278 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
279 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
280 ph10 501 actually used in this definition. */
281 nigel 77
282     #ifndef NO_RECURSE
283     #define REGISTER register
284 ph10 164
285 ph10 475 #ifdef PCRE_DEBUG
286 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
287 nigel 87 { \
288     printf("match() called in line %d\n", __LINE__); \
289 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
290 nigel 87 printf("to line %d\n", __LINE__); \
291     }
292     #define RRETURN(ra) \
293     { \
294     printf("match() returned %d from line %d ", ra, __LINE__); \
295     return ra; \
296     }
297     #else
298 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
299 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
300 nigel 77 #define RRETURN(ra) return ra
301 nigel 87 #endif
302    
303 nigel 77 #else
304    
305    
306 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
307     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308     argument of match(), which never changes. */
309 nigel 77
310     #define REGISTER
311    
312 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313 nigel 77 {\
314 ph10 563 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316 ph10 164 frame->Xwhere = rw; \
317     newframe->Xeptr = ra;\
318     newframe->Xecode = rb;\
319 ph10 168 newframe->Xmstart = mstart;\
320 ph10 501 newframe->Xmarkptr = markptr;\
321 ph10 164 newframe->Xoffset_top = rc;\
322     newframe->Xims = re;\
323     newframe->Xeptrb = rf;\
324     newframe->Xflags = rg;\
325     newframe->Xrdepth = frame->Xrdepth + 1;\
326     newframe->Xprevframe = frame;\
327     frame = newframe;\
328     DPRINTF(("restarting from line %d\n", __LINE__));\
329     goto HEAP_RECURSE;\
330     L_##rw:\
331     DPRINTF(("jumped back to line %d\n", __LINE__));\
332 nigel 77 }
333    
334     #define RRETURN(ra)\
335     {\
336 ph10 527 heapframe *oldframe = frame;\
337     frame = oldframe->Xprevframe;\
338     (pcre_stack_free)(oldframe);\
339 nigel 77 if (frame != NULL)\
340     {\
341 ph10 164 rrc = ra;\
342     goto HEAP_RETURN;\
343 nigel 77 }\
344     return ra;\
345     }
346    
347    
348     /* Structure for remembering the local variables in a private frame */
349    
350     typedef struct heapframe {
351     struct heapframe *Xprevframe;
352    
353     /* Function arguments that may change */
354    
355 ph10 409 USPTR Xeptr;
356 nigel 77 const uschar *Xecode;
357 ph10 409 USPTR Xmstart;
358 ph10 501 USPTR Xmarkptr;
359 nigel 77 int Xoffset_top;
360     long int Xims;
361     eptrblock *Xeptrb;
362     int Xflags;
363 nigel 91 unsigned int Xrdepth;
364 nigel 77
365     /* Function local variables */
366    
367 ph10 409 USPTR Xcallpat;
368 ph10 406 #ifdef SUPPORT_UTF8
369 ph10 409 USPTR Xcharptr;
370 ph10 406 #endif
371 ph10 409 USPTR Xdata;
372     USPTR Xnext;
373     USPTR Xpp;
374     USPTR Xprev;
375     USPTR Xsaved_eptr;
376 nigel 77
377     recursion_info Xnew_recursive;
378    
379     BOOL Xcur_is_word;
380     BOOL Xcondition;
381     BOOL Xprev_is_word;
382    
383     unsigned long int Xoriginal_ims;
384    
385     #ifdef SUPPORT_UCP
386     int Xprop_type;
387 nigel 87 int Xprop_value;
388 nigel 77 int Xprop_fail_result;
389     int Xprop_category;
390     int Xprop_chartype;
391 nigel 87 int Xprop_script;
392 ph10 123 int Xoclength;
393     uschar Xocchars[8];
394 nigel 77 #endif
395    
396 ph10 403 int Xcodelink;
397 nigel 77 int Xctype;
398 nigel 93 unsigned int Xfc;
399 nigel 77 int Xfi;
400     int Xlength;
401     int Xmax;
402     int Xmin;
403     int Xnumber;
404     int Xoffset;
405     int Xop;
406     int Xsave_capture_last;
407     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408     int Xstacksave[REC_STACK_SAVE_MAX];
409    
410     eptrblock Xnewptrb;
411    
412 ph10 164 /* Where to jump back to */
413 nigel 77
414 ph10 164 int Xwhere;
415 ph10 165
416 nigel 77 } heapframe;
417    
418     #endif
419    
420    
421     /***************************************************************************
422     ***************************************************************************/
423    
424    
425    
426     /*************************************************
427     * Match from current position *
428     *************************************************/
429    
430 nigel 93 /* This function is called recursively in many circumstances. Whenever it
431 nigel 77 returns a negative (error) response, the outer incarnation must also return the
432 ph10 426 same response. */
433 nigel 77
434 ph10 426 /* These macros pack up tests that are used for partial matching, and which
435     appears several times in the code. We set the "hit end" flag if the pointer is
436     at the end of the subject and also past the start of the subject (i.e.
437 ph10 427 something has been matched). For hard partial matching, we then return
438     immediately. The second one is used when we already know we are past the end of
439     the subject. */
440 ph10 426
441     #define CHECK_PARTIAL()\
442 ph10 553 if (md->partial != 0 && eptr >= md->end_subject && \
443     eptr > md->start_used_ptr) \
444     { \
445     md->hitend = TRUE; \
446     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447 ph10 427 }
448 ph10 426
449     #define SCHECK_PARTIAL()\
450 ph10 553 if (md->partial != 0 && eptr > md->start_used_ptr) \
451     { \
452     md->hitend = TRUE; \
453     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454 ph10 427 }
455 ph10 426
456 ph10 427
457 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
458     the md structure (e.g. utf8, end_subject) into individual variables to improve
459 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
460     made performance worse.
461    
462     Arguments:
463 nigel 93 eptr pointer to current character in subject
464     ecode pointer to current position in compiled code
465 ph10 168 mstart pointer to the current match start position (can be modified
466 ph10 172 by encountering \K)
467 ph10 501 markptr pointer to the most recent MARK name, or NULL
468 nigel 77 offset_top current top pointer
469     md pointer to "static" info for the match
470     ims current /i, /m, and /s options
471     eptrb pointer to chain of blocks containing eptr at start of
472     brackets - for testing for empty matches
473     flags can contain
474     match_condassert - this is an assertion condition
475 nigel 93 match_cbegroup - this is the start of an unlimited repeat
476     group that can match an empty string
477 nigel 87 rdepth the recursion depth
478 nigel 77
479     Returns: MATCH_MATCH if matched ) these values are >= 0
480     MATCH_NOMATCH if failed to match )
481 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
482 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
483 nigel 87 (e.g. stopped by repeated call or recursion limit)
484 nigel 77 */
485    
486     static int
487 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
488     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
489 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
490 nigel 77 {
491     /* These variables do not need to be preserved over recursion in this function,
492 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
493     "register" because they are used a lot in loops. */
494 nigel 77
495 nigel 91 register int rrc; /* Returns from recursive calls */
496     register int i; /* Used for loops not involving calls to RMATCH() */
497 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
498 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
499 nigel 77
500 nigel 93 BOOL minimize, possessive; /* Quantifier options */
501 ph10 403 int condcode;
502 nigel 93
503 nigel 77 /* When recursion is not being used, all "local" variables that have to be
504     preserved over calls to RMATCH() are part of a "frame" which is obtained from
505     heap storage. Set up the top-level frame here; others are obtained from the
506     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507    
508     #ifdef NO_RECURSE
509 ph10 563 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
512    
513     /* Copy in the original argument variables */
514    
515     frame->Xeptr = eptr;
516     frame->Xecode = ecode;
517 ph10 168 frame->Xmstart = mstart;
518 ph10 501 frame->Xmarkptr = markptr;
519 nigel 77 frame->Xoffset_top = offset_top;
520     frame->Xims = ims;
521     frame->Xeptrb = eptrb;
522     frame->Xflags = flags;
523 nigel 87 frame->Xrdepth = rdepth;
524 nigel 77
525     /* This is where control jumps back to to effect "recursion" */
526    
527     HEAP_RECURSE:
528    
529     /* Macros make the argument variables come from the current frame */
530    
531     #define eptr frame->Xeptr
532     #define ecode frame->Xecode
533 ph10 168 #define mstart frame->Xmstart
534 ph10 501 #define markptr frame->Xmarkptr
535 nigel 77 #define offset_top frame->Xoffset_top
536     #define ims frame->Xims
537     #define eptrb frame->Xeptrb
538     #define flags frame->Xflags
539 nigel 87 #define rdepth frame->Xrdepth
540 nigel 77
541     /* Ditto for the local variables */
542    
543     #ifdef SUPPORT_UTF8
544     #define charptr frame->Xcharptr
545     #endif
546     #define callpat frame->Xcallpat
547 ph10 403 #define codelink frame->Xcodelink
548 nigel 77 #define data frame->Xdata
549     #define next frame->Xnext
550     #define pp frame->Xpp
551     #define prev frame->Xprev
552     #define saved_eptr frame->Xsaved_eptr
553    
554     #define new_recursive frame->Xnew_recursive
555    
556     #define cur_is_word frame->Xcur_is_word
557     #define condition frame->Xcondition
558     #define prev_is_word frame->Xprev_is_word
559    
560     #define original_ims frame->Xoriginal_ims
561    
562     #ifdef SUPPORT_UCP
563     #define prop_type frame->Xprop_type
564 nigel 87 #define prop_value frame->Xprop_value
565 nigel 77 #define prop_fail_result frame->Xprop_fail_result
566     #define prop_category frame->Xprop_category
567     #define prop_chartype frame->Xprop_chartype
568 nigel 87 #define prop_script frame->Xprop_script
569 ph10 115 #define oclength frame->Xoclength
570     #define occhars frame->Xocchars
571 nigel 77 #endif
572    
573     #define ctype frame->Xctype
574     #define fc frame->Xfc
575     #define fi frame->Xfi
576     #define length frame->Xlength
577     #define max frame->Xmax
578     #define min frame->Xmin
579     #define number frame->Xnumber
580     #define offset frame->Xoffset
581     #define op frame->Xop
582     #define save_capture_last frame->Xsave_capture_last
583     #define save_offset1 frame->Xsave_offset1
584     #define save_offset2 frame->Xsave_offset2
585     #define save_offset3 frame->Xsave_offset3
586     #define stacksave frame->Xstacksave
587    
588     #define newptrb frame->Xnewptrb
589    
590     /* When recursion is being used, local variables are allocated on the stack and
591     get preserved during recursion in the normal way. In this environment, fi and
592     i, and fc and c, can be the same variables. */
593    
594 nigel 93 #else /* NO_RECURSE not defined */
595 nigel 77 #define fi i
596     #define fc c
597    
598    
599 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
600     const uschar *charptr; /* in small blocks of the code. My normal */
601     #endif /* style of coding would have declared */
602     const uschar *callpat; /* them within each of those blocks. */
603     const uschar *data; /* However, in order to accommodate the */
604     const uschar *next; /* version of this code that uses an */
605     USPTR pp; /* external "stack" implemented on the */
606     const uschar *prev; /* heap, it is easier to declare them all */
607     USPTR saved_eptr; /* here, so the declarations can be cut */
608     /* out in a block. The only declarations */
609     recursion_info new_recursive; /* within blocks below are for variables */
610     /* that do not have to be preserved over */
611     BOOL cur_is_word; /* a recursive call to RMATCH(). */
612     BOOL condition;
613 nigel 77 BOOL prev_is_word;
614    
615     unsigned long int original_ims;
616    
617     #ifdef SUPPORT_UCP
618     int prop_type;
619 nigel 87 int prop_value;
620 nigel 77 int prop_fail_result;
621     int prop_category;
622     int prop_chartype;
623 nigel 87 int prop_script;
624 ph10 115 int oclength;
625     uschar occhars[8];
626 nigel 77 #endif
627    
628 ph10 399 int codelink;
629 nigel 77 int ctype;
630     int length;
631     int max;
632     int min;
633     int number;
634     int offset;
635     int op;
636     int save_capture_last;
637     int save_offset1, save_offset2, save_offset3;
638     int stacksave[REC_STACK_SAVE_MAX];
639    
640     eptrblock newptrb;
641 nigel 93 #endif /* NO_RECURSE */
642 nigel 77
643     /* These statements are here to stop the compiler complaining about unitialized
644     variables. */
645    
646     #ifdef SUPPORT_UCP
647 nigel 87 prop_value = 0;
648 nigel 77 prop_fail_result = 0;
649     #endif
650    
651 nigel 93
652 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
653     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
654     used. Thanks to Ian Taylor for noticing this possibility and sending the
655     original patch. */
656    
657     TAIL_RECURSE:
658    
659 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
660     are specified by the macro RMATCH and RRETURN is used to return. When
661     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
662 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
663 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
664     complicated macro. It has to be used in one particular way. This shouldn't,
665     however, impact performance when true recursion is being used. */
666 nigel 77
667 ph10 164 #ifdef SUPPORT_UTF8
668     utf8 = md->utf8; /* Local copy of the flag */
669     #else
670     utf8 = FALSE;
671     #endif
672    
673 nigel 87 /* First check that we haven't called match() too many times, or that we
674     haven't exceeded the recursive call limit. */
675    
676 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
677 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
678 nigel 77
679     original_ims = ims; /* Save for resetting on ')' */
680 nigel 91
681 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
682     string, the match_cbegroup flag is set. When this is the case, add the current
683     subject pointer to the chain of such remembered pointers, to be checked when we
684     hit the closing ket, in order to break infinite loops that match no characters.
685 ph10 197 When match() is called in other circumstances, don't add to the chain. The
686     match_cbegroup flag must NOT be used with tail recursion, because the memory
687     block that is used is on the stack, so a new one may be required for each
688     match(). */
689 nigel 77
690 nigel 93 if ((flags & match_cbegroup) != 0)
691 nigel 77 {
692 ph10 197 newptrb.epb_saved_eptr = eptr;
693     newptrb.epb_prev = eptrb;
694     eptrb = &newptrb;
695 nigel 77 }
696    
697 nigel 93 /* Now start processing the opcodes. */
698 nigel 77
699     for (;;)
700     {
701 nigel 93 minimize = possessive = FALSE;
702 nigel 77 op = *ecode;
703 ph10 443
704 nigel 93 switch(op)
705     {
706 ph10 510 case OP_MARK:
707     markptr = ecode + 2;
708     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
709 ph10 512 ims, eptrb, flags, RM55);
710    
711     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
712     argument, and we must check whether that argument matches this MARK's
713     argument. It is passed back in md->start_match_ptr (an overloading of that
714     variable). If it does match, we reset that variable to the current subject
715     position and return MATCH_SKIP. Otherwise, pass back the return code
716 ph10 510 unaltered. */
717 ph10 512
718     if (rrc == MATCH_SKIP_ARG &&
719 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
720     {
721     md->start_match_ptr = eptr;
722     RRETURN(MATCH_SKIP);
723     }
724    
725 ph10 512 if (md->mark == NULL) md->mark = markptr;
726 ph10 510 RRETURN(rrc);
727    
728 ph10 210 case OP_FAIL:
729 ph10 510 MRRETURN(MATCH_NOMATCH);
730 ph10 211
731 ph10 551 /* COMMIT overrides PRUNE, SKIP, and THEN */
732 ph10 553
733 ph10 510 case OP_COMMIT:
734     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735     ims, eptrb, flags, RM52);
736 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737 ph10 553 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738     rrc != MATCH_THEN)
739 ph10 551 RRETURN(rrc);
740 ph10 510 MRRETURN(MATCH_COMMIT);
741    
742 ph10 551 /* PRUNE overrides THEN */
743 ph10 553
744 ph10 210 case OP_PRUNE:
745     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746     ims, eptrb, flags, RM51);
747 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 ph10 510 MRRETURN(MATCH_PRUNE);
749 ph10 211
750 ph10 510 case OP_PRUNE_ARG:
751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ph10 512 ims, eptrb, flags, RM56);
753 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754 ph10 510 md->mark = ecode + 2;
755     RRETURN(MATCH_PRUNE);
756 ph10 211
757 ph10 551 /* SKIP overrides PRUNE and THEN */
758 ph10 553
759 ph10 210 case OP_SKIP:
760     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761     ims, eptrb, flags, RM53);
762 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763 ph10 551 RRETURN(rrc);
764 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
765 ph10 510 MRRETURN(MATCH_SKIP);
766 ph10 211
767 ph10 510 case OP_SKIP_ARG:
768     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769 ph10 512 ims, eptrb, flags, RM57);
770 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771 ph10 551 RRETURN(rrc);
772 ph10 512
773     /* Pass back the current skip name by overloading md->start_match_ptr and
774     returning the special MATCH_SKIP_ARG return code. This will either be
775     caught by a matching MARK, or get to the top, where it is treated the same
776 ph10 510 as PRUNE. */
777 ph10 512
778 ph10 510 md->start_match_ptr = ecode + 2;
779 ph10 512 RRETURN(MATCH_SKIP_ARG);
780 ph10 553
781 ph10 550 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782 ph10 553 the alt that is at the start of the current branch. This makes it possible
783     to skip back past alternatives that precede the THEN within the current
784     branch. */
785 ph10 512
786 ph10 210 case OP_THEN:
787     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788 ph10 212 ims, eptrb, flags, RM54);
789 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
791 ph10 510 MRRETURN(MATCH_THEN);
792    
793     case OP_THEN_ARG:
794 ph10 553 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795 ph10 550 offset_top, md, ims, eptrb, flags, RM58);
796 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
798     md->mark = ecode + LINK_SIZE + 2;
799 ph10 212 RRETURN(MATCH_THEN);
800 ph10 211
801 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
802     the current subject position in the working slot at the top of the vector.
803     We mustn't change the current values of the data slot, because they may be
804     set from a previous iteration of this group, and be referred to by a
805     reference inside the group.
806 nigel 77
807 nigel 93 If the bracket fails to match, we need to restore this value and also the
808     values of the final offsets, in case they were set by a previous iteration
809     of the same bracket.
810 nigel 77
811 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
812     a non-capturing bracket. Don't worry about setting the flag for the error
813     case here; that is handled in the code for KET. */
814 nigel 77
815 nigel 93 case OP_CBRA:
816     case OP_SCBRA:
817     number = GET2(ecode, 1+LINK_SIZE);
818 nigel 77 offset = number << 1;
819    
820 ph10 475 #ifdef PCRE_DEBUG
821 nigel 93 printf("start bracket %d\n", number);
822     printf("subject=");
823 nigel 77 pchars(eptr, 16, TRUE, md);
824     printf("\n");
825     #endif
826    
827     if (offset < md->offset_max)
828     {
829     save_offset1 = md->offset_vector[offset];
830     save_offset2 = md->offset_vector[offset+1];
831     save_offset3 = md->offset_vector[md->offset_end - number];
832     save_capture_last = md->capture_last;
833    
834     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835 ph10 531 md->offset_vector[md->offset_end - number] =
836 ph10 530 (int)(eptr - md->start_subject);
837 nigel 77
838 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
839 nigel 77 do
840     {
841 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842     ims, eptrb, flags, RM1);
843 ph10 550 if (rrc != MATCH_NOMATCH &&
844     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845     RRETURN(rrc);
846 nigel 77 md->capture_last = save_capture_last;
847     ecode += GET(ecode, 1);
848     }
849     while (*ecode == OP_ALT);
850    
851     DPRINTF(("bracket %d failed\n", number));
852    
853     md->offset_vector[offset] = save_offset1;
854     md->offset_vector[offset+1] = save_offset2;
855     md->offset_vector[md->offset_end - number] = save_offset3;
856    
857 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
858 nigel 77 RRETURN(MATCH_NOMATCH);
859     }
860    
861 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
862     as a non-capturing bracket. */
863 nigel 77
864 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
866    
867 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
868 nigel 77
869 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
870     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871    
872 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
873     final alternative within the brackets, we would return the result of a
874     recursive call to match() whatever happened. We can reduce stack usage by
875 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
876     is set.*/
877 nigel 77
878 nigel 93 case OP_BRA:
879     case OP_SBRA:
880     DPRINTF(("start non-capturing bracket\n"));
881     flags = (op >= OP_SBRA)? match_cbegroup : 0;
882 nigel 91 for (;;)
883 nigel 77 {
884 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
885 nigel 93 {
886 ph10 197 if (flags == 0) /* Not a possibly empty group */
887     {
888     ecode += _pcre_OP_lengths[*ecode];
889     DPRINTF(("bracket 0 tail recursion\n"));
890     goto TAIL_RECURSE;
891     }
892    
893     /* Possibly empty group; can't use tail recursion. */
894    
895     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
896     eptrb, flags, RM48);
897 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
898     RRETURN(rrc);
899 nigel 93 }
900 nigel 91
901     /* For non-final alternatives, continue the loop for a NOMATCH result;
902     otherwise return. */
903    
904 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905     eptrb, flags, RM2);
906 ph10 550 if (rrc != MATCH_NOMATCH &&
907     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908     RRETURN(rrc);
909 nigel 77 ecode += GET(ecode, 1);
910     }
911 nigel 91 /* Control never reaches here. */
912 nigel 77
913     /* Conditional group: compilation checked that there are no more than
914     two branches. If the condition is false, skipping the first branch takes us
915     past the end if there is only one branch, but that's OK because that is
916 nigel 91 exactly what going to the ket would do. As there is only one branch to be
917     obeyed, we can use tail recursion to avoid using another stack frame. */
918 nigel 77
919     case OP_COND:
920 nigel 93 case OP_SCOND:
921 ph10 399 codelink= GET(ecode, 1);
922 ph10 406
923 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
924     inserted between OP_COND and an assertion condition. */
925 ph10 392
926 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
927     {
928     if (pcre_callout != NULL)
929     {
930     pcre_callout_block cb;
931     cb.version = 1; /* Version 1 of the callout block */
932     cb.callout_number = ecode[LINK_SIZE+2];
933     cb.offset_vector = md->offset_vector;
934     cb.subject = (PCRE_SPTR)md->start_subject;
935 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
936     cb.start_match = (int)(mstart - md->start_subject);
937     cb.current_position = (int)(eptr - md->start_subject);
938 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940     cb.capture_top = offset_top/2;
941     cb.capture_last = md->capture_last;
942     cb.callout_data = md->callout_data;
943 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
944 ph10 381 if (rrc < 0) RRETURN(rrc);
945     }
946     ecode += _pcre_OP_lengths[OP_CALLOUT];
947     }
948 ph10 392
949 ph10 399 condcode = ecode[LINK_SIZE+1];
950 ph10 406
951 ph10 381 /* Now see what the actual condition is */
952 ph10 392
953 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
954 nigel 77 {
955 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
956     {
957 ph10 461 condition = FALSE;
958     ecode += GET(ecode, 1);
959     }
960 ph10 459 else
961 ph10 461 {
962 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
963     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
964 ph10 461
965 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
966     false, but the test was set up by name, scan the table to see if the
967     name refers to any other numbers, and test them. The condition is true
968     if any one is set. */
969 ph10 461
970 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
971     {
972     uschar *slotA = md->name_table;
973     for (i = 0; i < md->name_count; i++)
974 ph10 461 {
975     if (GET2(slotA, 0) == recno) break;
976 ph10 459 slotA += md->name_entry_size;
977     }
978 ph10 461
979 ph10 459 /* Found a name for the number - there can be only one; duplicate
980     names for different numbers are allowed, but not vice versa. First
981     scan down for duplicates. */
982 ph10 461
983 ph10 459 if (i < md->name_count)
984 ph10 461 {
985 ph10 459 uschar *slotB = slotA;
986     while (slotB > md->name_table)
987     {
988     slotB -= md->name_entry_size;
989     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
990     {
991     condition = GET2(slotB, 0) == md->recursive->group_num;
992 ph10 461 if (condition) break;
993     }
994 ph10 459 else break;
995 ph10 461 }
996    
997 ph10 459 /* Scan up for duplicates */
998 ph10 461
999 ph10 459 if (!condition)
1000 ph10 461 {
1001 ph10 459 slotB = slotA;
1002     for (i++; i < md->name_count; i++)
1003     {
1004     slotB += md->name_entry_size;
1005     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1006     {
1007     condition = GET2(slotB, 0) == md->recursive->group_num;
1008     if (condition) break;
1009 ph10 461 }
1010 ph10 459 else break;
1011 ph10 461 }
1012     }
1013 ph10 459 }
1014 ph10 461 }
1015    
1016 ph10 459 /* Chose branch according to the condition */
1017 ph10 461
1018 ph10 459 ecode += condition? 3 : GET(ecode, 1);
1019     }
1020 ph10 461 }
1021 nigel 93
1022 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1023 nigel 93 {
1024 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1025 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1026 ph10 461
1027 ph10 459 /* If the numbered capture is unset, but the reference was by name,
1028 ph10 461 scan the table to see if the name refers to any other numbers, and test
1029     them. The condition is true if any one is set. This is tediously similar
1030     to the code above, but not close enough to try to amalgamate. */
1031    
1032 ph10 459 if (!condition && condcode == OP_NCREF)
1033     {
1034 ph10 461 int refno = offset >> 1;
1035 ph10 459 uschar *slotA = md->name_table;
1036 ph10 461
1037 ph10 459 for (i = 0; i < md->name_count; i++)
1038 ph10 461 {
1039     if (GET2(slotA, 0) == refno) break;
1040 ph10 459 slotA += md->name_entry_size;
1041     }
1042 ph10 461
1043     /* Found a name for the number - there can be only one; duplicate names
1044     for different numbers are allowed, but not vice versa. First scan down
1045 ph10 459 for duplicates. */
1046 ph10 461
1047 ph10 459 if (i < md->name_count)
1048 ph10 461 {
1049 ph10 459 uschar *slotB = slotA;
1050     while (slotB > md->name_table)
1051     {
1052     slotB -= md->name_entry_size;
1053     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054     {
1055     offset = GET2(slotB, 0) << 1;
1056 ph10 461 condition = offset < offset_top &&
1057 ph10 459 md->offset_vector[offset] >= 0;
1058 ph10 461 if (condition) break;
1059     }
1060 ph10 459 else break;
1061 ph10 461 }
1062    
1063 ph10 459 /* Scan up for duplicates */
1064 ph10 461
1065 ph10 459 if (!condition)
1066 ph10 461 {
1067 ph10 459 slotB = slotA;
1068     for (i++; i < md->name_count; i++)
1069     {
1070     slotB += md->name_entry_size;
1071     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1072     {
1073     offset = GET2(slotB, 0) << 1;
1074 ph10 461 condition = offset < offset_top &&
1075 ph10 459 md->offset_vector[offset] >= 0;
1076 ph10 461 if (condition) break;
1077     }
1078 ph10 459 else break;
1079 ph10 461 }
1080     }
1081 ph10 459 }
1082 ph10 461 }
1083    
1084 ph10 459 /* Chose branch according to the condition */
1085    
1086 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1087 nigel 77 }
1088    
1089 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1090 nigel 93 {
1091     condition = FALSE;
1092     ecode += GET(ecode, 1);
1093     }
1094    
1095 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1096 nigel 93 the final argument match_condassert causes it to stop at the end of an
1097     assertion. */
1098 nigel 77
1099     else
1100     {
1101 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1102     match_condassert, RM3);
1103 nigel 77 if (rrc == MATCH_MATCH)
1104     {
1105 nigel 93 condition = TRUE;
1106     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108     }
1109 ph10 550 else if (rrc != MATCH_NOMATCH &&
1110     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111 nigel 77 {
1112     RRETURN(rrc); /* Need braces because of following else */
1113     }
1114 nigel 93 else
1115     {
1116     condition = FALSE;
1117 ph10 399 ecode += codelink;
1118 nigel 93 }
1119     }
1120 nigel 91
1121 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1122 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1123     match_cbegroup is required for an unlimited repeat of a possibly empty
1124     group. If the second alternative doesn't exist, we can just plough on. */
1125 nigel 91
1126 nigel 93 if (condition || *ecode == OP_ALT)
1127     {
1128 nigel 91 ecode += 1 + LINK_SIZE;
1129 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1130     {
1131     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1132     RRETURN(rrc);
1133     }
1134     else /* Group must match something */
1135     {
1136     flags = 0;
1137     goto TAIL_RECURSE;
1138     }
1139 nigel 77 }
1140 ph10 395 else /* Condition false & no alternative */
1141 nigel 93 {
1142     ecode += 1 + LINK_SIZE;
1143     }
1144     break;
1145 nigel 77
1146 ph10 461
1147 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1148     to close any currently open capturing brackets. */
1149 ph10 461
1150 ph10 447 case OP_CLOSE:
1151 ph10 461 number = GET2(ecode, 1);
1152 ph10 447 offset = number << 1;
1153 ph10 461
1154 ph10 475 #ifdef PCRE_DEBUG
1155 ph10 447 printf("end bracket %d at *ACCEPT", number);
1156     printf("\n");
1157     #endif
1158 nigel 77
1159 ph10 447 md->capture_last = number;
1160     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1161     {
1162     md->offset_vector[offset] =
1163     md->offset_vector[md->offset_end - number];
1164 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1166     }
1167     ecode += 3;
1168 ph10 461 break;
1169 ph10 447
1170    
1171 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1172     recursion, we should restore the offsets appropriately and continue from
1173     after the call. */
1174 nigel 77
1175 ph10 210 case OP_ACCEPT:
1176 nigel 77 case OP_END:
1177     if (md->recursive != NULL && md->recursive->group_num == 0)
1178     {
1179     recursion_info *rec = md->recursive;
1180 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1181 nigel 77 md->recursive = rec->prevrec;
1182     memmove(md->offset_vector, rec->offset_save,
1183     rec->saved_max * sizeof(int));
1184 ph10 461 offset_top = rec->save_offset_top;
1185 nigel 77 ims = original_ims;
1186     ecode = rec->after_call;
1187     break;
1188     }
1189    
1190 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1191     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1192     the subject. In both cases, backtracking will then try other alternatives,
1193     if any. */
1194 ph10 443
1195 ph10 442 if (eptr == mstart &&
1196     (md->notempty ||
1197 ph10 443 (md->notempty_atstart &&
1198 ph10 442 mstart == md->start_subject + md->start_offset)))
1199 ph10 510 MRRETURN(MATCH_NOMATCH);
1200 ph10 443
1201 ph10 442 /* Otherwise, we have a match. */
1202 nigel 77
1203 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1204     md->end_offset_top = offset_top; /* and how many extracts were taken */
1205 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1206 nigel 77
1207 ph10 512 /* For some reason, the macros don't work properly if an expression is
1208     given as the argument to MRRETURN when the heap is in use. */
1209    
1210     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1211     MRRETURN(rrc);
1212    
1213 nigel 77 /* Change option settings */
1214    
1215     case OP_OPT:
1216     ims = ecode[1];
1217     ecode += 2;
1218     DPRINTF(("ims set to %02lx\n", ims));
1219     break;
1220    
1221     /* Assertion brackets. Check the alternative branches in turn - the
1222     matching won't pass the KET for an assertion. If any one branch matches,
1223     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1224     start of each branch to move the current point backwards, so the code at
1225     this level is identical to the lookahead case. */
1226    
1227     case OP_ASSERT:
1228     case OP_ASSERTBACK:
1229     do
1230     {
1231 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1232     RM4);
1233 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1234 ph10 500 {
1235     mstart = md->start_match_ptr; /* In case \K reset it */
1236     break;
1237 ph10 501 }
1238 ph10 550 if (rrc != MATCH_NOMATCH &&
1239     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240     RRETURN(rrc);
1241 nigel 77 ecode += GET(ecode, 1);
1242     }
1243     while (*ecode == OP_ALT);
1244 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1245 nigel 77
1246     /* If checking an assertion for a condition, return MATCH_MATCH. */
1247    
1248     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1249    
1250     /* Continue from after the assertion, updating the offsets high water
1251     mark, since extracts may have been taken during the assertion. */
1252    
1253     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254     ecode += 1 + LINK_SIZE;
1255     offset_top = md->end_offset_top;
1256     continue;
1257    
1258 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1259 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1260 ph10 473 branches. */
1261 nigel 77
1262     case OP_ASSERT_NOT:
1263     case OP_ASSERTBACK_NOT:
1264     do
1265     {
1266 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1267     RM5);
1268 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1269 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1270     {
1271     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272 ph10 482 break;
1273     }
1274 ph10 550 if (rrc != MATCH_NOMATCH &&
1275     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276     RRETURN(rrc);
1277 nigel 77 ecode += GET(ecode,1);
1278     }
1279     while (*ecode == OP_ALT);
1280    
1281     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1282    
1283     ecode += 1 + LINK_SIZE;
1284     continue;
1285    
1286     /* Move the subject pointer back. This occurs only at the start of
1287     each branch of a lookbehind assertion. If we are too close to the start to
1288     move back, this match function fails. When working with UTF-8 we move
1289     back a number of characters, not bytes. */
1290    
1291     case OP_REVERSE:
1292     #ifdef SUPPORT_UTF8
1293     if (utf8)
1294     {
1295 nigel 93 i = GET(ecode, 1);
1296     while (i-- > 0)
1297 nigel 77 {
1298     eptr--;
1299 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1300 ph10 207 BACKCHAR(eptr);
1301 nigel 77 }
1302     }
1303     else
1304     #endif
1305    
1306     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1307    
1308     {
1309 nigel 93 eptr -= GET(ecode, 1);
1310 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1311 nigel 77 }
1312    
1313 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1314 nigel 77
1315 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1316 nigel 77 ecode += 1 + LINK_SIZE;
1317     break;
1318    
1319     /* The callout item calls an external function, if one is provided, passing
1320     details of the match so far. This is mainly for debugging, though the
1321     function is able to force a failure. */
1322    
1323     case OP_CALLOUT:
1324     if (pcre_callout != NULL)
1325     {
1326     pcre_callout_block cb;
1327     cb.version = 1; /* Version 1 of the callout block */
1328     cb.callout_number = ecode[1];
1329     cb.offset_vector = md->offset_vector;
1330 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1331 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1332     cb.start_match = (int)(mstart - md->start_subject);
1333     cb.current_position = (int)(eptr - md->start_subject);
1334 nigel 77 cb.pattern_position = GET(ecode, 2);
1335     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336     cb.capture_top = offset_top/2;
1337     cb.capture_last = md->capture_last;
1338     cb.callout_data = md->callout_data;
1339 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1340 nigel 77 if (rrc < 0) RRETURN(rrc);
1341     }
1342     ecode += 2 + 2*LINK_SIZE;
1343     break;
1344    
1345     /* Recursion either matches the current regex, or some subexpression. The
1346     offset data is the offset to the starting bracket from the start of the
1347     whole pattern. (This is so that it works from duplicated subpatterns.)
1348    
1349     If there are any capturing brackets started but not finished, we have to
1350     save their starting points and reinstate them after the recursion. However,
1351     we don't know how many such there are (offset_top records the completed
1352     total) so we just have to save all the potential data. There may be up to
1353     65535 such values, which is too large to put on the stack, but using malloc
1354     for small numbers seems expensive. As a compromise, the stack is used when
1355     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1356     is used. A problem is what to do if the malloc fails ... there is no way of
1357     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1358     values on the stack, and accept that the rest may be wrong.
1359    
1360     There are also other values that have to be saved. We use a chained
1361     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1362     for the original version of this logic. */
1363    
1364     case OP_RECURSE:
1365     {
1366     callpat = md->start_code + GET(ecode, 1);
1367 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1368     GET2(callpat, 1 + LINK_SIZE);
1369 nigel 77
1370     /* Add to "recursing stack" */
1371    
1372     new_recursive.prevrec = md->recursive;
1373     md->recursive = &new_recursive;
1374    
1375     /* Find where to continue from afterwards */
1376    
1377     ecode += 1 + LINK_SIZE;
1378     new_recursive.after_call = ecode;
1379    
1380     /* Now save the offset data. */
1381    
1382     new_recursive.saved_max = md->offset_end;
1383     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1384     new_recursive.offset_save = stacksave;
1385     else
1386     {
1387     new_recursive.offset_save =
1388     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1389     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1390     }
1391    
1392     memcpy(new_recursive.offset_save, md->offset_vector,
1393     new_recursive.saved_max * sizeof(int));
1394 ph10 461 new_recursive.save_offset_top = offset_top;
1395 nigel 77
1396     /* OK, now we can do the recursion. For each top-level alternative we
1397     restore the offset and recursion data. */
1398    
1399     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1400 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1401 nigel 77 do
1402     {
1403 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1404     md, ims, eptrb, flags, RM6);
1405 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1406 nigel 77 {
1407 nigel 87 DPRINTF(("Recursion matched\n"));
1408 nigel 77 md->recursive = new_recursive.prevrec;
1409     if (new_recursive.offset_save != stacksave)
1410     (pcre_free)(new_recursive.offset_save);
1411 ph10 510 MRRETURN(MATCH_MATCH);
1412 nigel 77 }
1413 ph10 550 else if (rrc != MATCH_NOMATCH &&
1414     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415 nigel 87 {
1416     DPRINTF(("Recursion gave error %d\n", rrc));
1417 ph10 400 if (new_recursive.offset_save != stacksave)
1418     (pcre_free)(new_recursive.offset_save);
1419 nigel 87 RRETURN(rrc);
1420     }
1421 nigel 77
1422     md->recursive = &new_recursive;
1423     memcpy(md->offset_vector, new_recursive.offset_save,
1424     new_recursive.saved_max * sizeof(int));
1425     callpat += GET(callpat, 1);
1426     }
1427     while (*callpat == OP_ALT);
1428    
1429     DPRINTF(("Recursion didn't match\n"));
1430     md->recursive = new_recursive.prevrec;
1431     if (new_recursive.offset_save != stacksave)
1432     (pcre_free)(new_recursive.offset_save);
1433 ph10 510 MRRETURN(MATCH_NOMATCH);
1434 nigel 77 }
1435     /* Control never reaches here */
1436    
1437     /* "Once" brackets are like assertion brackets except that after a match,
1438     the point in the subject string is not moved back. Thus there can never be
1439     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1440     Check the alternative branches in turn - the matching won't pass the KET
1441     for this kind of subpattern. If any one branch matches, we carry on as at
1442 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1443     the start-of-match value in case it was changed by \K. */
1444 nigel 77
1445     case OP_ONCE:
1446 nigel 91 prev = ecode;
1447     saved_eptr = eptr;
1448    
1449     do
1450 nigel 77 {
1451 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1452 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1453 ph10 500 {
1454     mstart = md->start_match_ptr;
1455     break;
1456 ph10 501 }
1457 ph10 550 if (rrc != MATCH_NOMATCH &&
1458     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459     RRETURN(rrc);
1460 nigel 91 ecode += GET(ecode,1);
1461     }
1462     while (*ecode == OP_ALT);
1463 nigel 77
1464 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1465 nigel 77
1466 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1467 nigel 77
1468 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1469     mark, since extracts may have been taken. */
1470 nigel 77
1471 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1472 nigel 77
1473 nigel 91 offset_top = md->end_offset_top;
1474     eptr = md->end_match_ptr;
1475 nigel 77
1476 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1477     happens for a repeating ket if no characters were matched in the group.
1478     This is the forcible breaking of infinite loops as implemented in Perl
1479     5.005. If there is an options reset, it will get obeyed in the normal
1480     course of events. */
1481 nigel 77
1482 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1483     {
1484     ecode += 1+LINK_SIZE;
1485     break;
1486     }
1487 nigel 77
1488 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1489     preceding bracket, in the appropriate order. The second "call" of match()
1490     uses tail recursion, to avoid using another stack frame. We need to reset
1491     any options that changed within the bracket before re-running it, so
1492     check the next opcode. */
1493 nigel 77
1494 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1495     {
1496     ims = (ims & ~PCRE_IMS) | ecode[4];
1497     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1498     }
1499 nigel 77
1500 nigel 91 if (*ecode == OP_KETRMIN)
1501     {
1502 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1503 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1504     ecode = prev;
1505 ph10 197 flags = 0;
1506 nigel 91 goto TAIL_RECURSE;
1507 nigel 77 }
1508 nigel 91 else /* OP_KETRMAX */
1509     {
1510 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1511 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1512     ecode += 1 + LINK_SIZE;
1513 ph10 197 flags = 0;
1514 nigel 91 goto TAIL_RECURSE;
1515     }
1516     /* Control never gets here */
1517 nigel 77
1518     /* An alternation is the end of a branch; scan along to find the end of the
1519     bracketed group and go to there. */
1520    
1521     case OP_ALT:
1522     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1523     break;
1524    
1525 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1526     indicating that it may occur zero times. It may repeat infinitely, or not
1527     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1528     with fixed upper repeat limits are compiled as a number of copies, with the
1529     optional ones preceded by BRAZERO or BRAMINZERO. */
1530 nigel 77
1531     case OP_BRAZERO:
1532     {
1533     next = ecode+1;
1534 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1535 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1536     do next += GET(next,1); while (*next == OP_ALT);
1537 nigel 93 ecode = next + 1 + LINK_SIZE;
1538 nigel 77 }
1539     break;
1540    
1541     case OP_BRAMINZERO:
1542     {
1543     next = ecode+1;
1544 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1545 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1546 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1547     ecode++;
1548     }
1549     break;
1550    
1551 ph10 335 case OP_SKIPZERO:
1552     {
1553     next = ecode+1;
1554     do next += GET(next,1); while (*next == OP_ALT);
1555     ecode = next + 1 + LINK_SIZE;
1556     }
1557     break;
1558    
1559 nigel 93 /* End of a group, repeated or non-repeating. */
1560 nigel 77
1561     case OP_KET:
1562     case OP_KETRMIN:
1563     case OP_KETRMAX:
1564 nigel 91 prev = ecode - GET(ecode, 1);
1565 nigel 77
1566 nigel 93 /* If this was a group that remembered the subject start, in order to break
1567     infinite repeats of empty string matches, retrieve the subject start from
1568     the chain. Otherwise, set it NULL. */
1569 nigel 77
1570 nigel 93 if (*prev >= OP_SBRA)
1571     {
1572     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1573     eptrb = eptrb->epb_prev; /* Backup to previous group */
1574     }
1575     else saved_eptr = NULL;
1576 nigel 77
1577 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1578     matching and return MATCH_MATCH, but record the current high water mark for
1579     use by positive assertions. We also need to record the match start in case
1580     it was changed by \K. */
1581 nigel 93
1582 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1583     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1584     *prev == OP_ONCE)
1585     {
1586     md->end_match_ptr = eptr; /* For ONCE */
1587     md->end_offset_top = offset_top;
1588 ph10 500 md->start_match_ptr = mstart;
1589 ph10 510 MRRETURN(MATCH_MATCH);
1590 nigel 91 }
1591 nigel 77
1592 nigel 93 /* For capturing groups we have to check the group number back at the start
1593     and if necessary complete handling an extraction by setting the offsets and
1594     bumping the high water mark. Note that whole-pattern recursion is coded as
1595     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1596     when the OP_END is reached. Other recursion is handled here. */
1597 nigel 77
1598 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1599 nigel 91 {
1600 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1601 nigel 91 offset = number << 1;
1602 ph10 461
1603 ph10 475 #ifdef PCRE_DEBUG
1604 nigel 91 printf("end bracket %d", number);
1605     printf("\n");
1606 nigel 77 #endif
1607    
1608 nigel 93 md->capture_last = number;
1609     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1610 nigel 91 {
1611 nigel 93 md->offset_vector[offset] =
1612     md->offset_vector[md->offset_end - number];
1613 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1615     }
1616 nigel 77
1617 nigel 93 /* Handle a recursively called group. Restore the offsets
1618     appropriately and continue from after the call. */
1619 nigel 77
1620 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1621     {
1622     recursion_info *rec = md->recursive;
1623     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1624     md->recursive = rec->prevrec;
1625     memcpy(md->offset_vector, rec->offset_save,
1626     rec->saved_max * sizeof(int));
1627 ph10 461 offset_top = rec->save_offset_top;
1628 nigel 93 ecode = rec->after_call;
1629     ims = original_ims;
1630     break;
1631 nigel 77 }
1632 nigel 91 }
1633 nigel 77
1634 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1635     flags, in case they got changed during the group. */
1636 nigel 77
1637 nigel 91 ims = original_ims;
1638     DPRINTF(("ims reset to %02lx\n", ims));
1639 nigel 77
1640 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1641     happens for a repeating ket if no characters were matched in the group.
1642     This is the forcible breaking of infinite loops as implemented in Perl
1643     5.005. If there is an options reset, it will get obeyed in the normal
1644     course of events. */
1645 nigel 77
1646 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1647     {
1648     ecode += 1 + LINK_SIZE;
1649     break;
1650     }
1651 nigel 77
1652 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1653     preceding bracket, in the appropriate order. In the second case, we can use
1654 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1655     unlimited repeat of a group that can match an empty string. */
1656 nigel 77
1657 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1658    
1659 nigel 91 if (*ecode == OP_KETRMIN)
1660     {
1661 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1662 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663 ph10 197 if (flags != 0) /* Could match an empty string */
1664     {
1665     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1666     RRETURN(rrc);
1667     }
1668 nigel 91 ecode = prev;
1669     goto TAIL_RECURSE;
1670 nigel 77 }
1671 nigel 91 else /* OP_KETRMAX */
1672     {
1673 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1674 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1675     ecode += 1 + LINK_SIZE;
1676 ph10 197 flags = 0;
1677 nigel 91 goto TAIL_RECURSE;
1678     }
1679     /* Control never gets here */
1680 nigel 77
1681     /* Start of subject unless notbol, or after internal newline if multiline */
1682    
1683     case OP_CIRC:
1684 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1685 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1686     {
1687 nigel 91 if (eptr != md->start_subject &&
1688 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1689 ph10 510 MRRETURN(MATCH_NOMATCH);
1690 nigel 77 ecode++;
1691     break;
1692     }
1693     /* ... else fall through */
1694    
1695     /* Start of subject assertion */
1696    
1697     case OP_SOD:
1698 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1699 nigel 77 ecode++;
1700     break;
1701    
1702     /* Start of match assertion */
1703    
1704     case OP_SOM:
1705 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1706 nigel 77 ecode++;
1707     break;
1708 ph10 172
1709 ph10 168 /* Reset the start of match point */
1710 ph10 172
1711 ph10 168 case OP_SET_SOM:
1712     mstart = eptr;
1713 ph10 172 ecode++;
1714     break;
1715 nigel 77
1716     /* Assert before internal newline if multiline, or before a terminating
1717     newline unless endonly is set, else end of subject unless noteol is set. */
1718    
1719     case OP_DOLL:
1720     if ((ims & PCRE_MULTILINE) != 0)
1721     {
1722     if (eptr < md->end_subject)
1723 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724 nigel 77 else
1725 ph10 579 {
1726     if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727 ph10 553 SCHECK_PARTIAL();
1728     }
1729 nigel 77 ecode++;
1730     break;
1731     }
1732 ph10 553 else /* Not multiline */
1733 nigel 77 {
1734 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735 ph10 553 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1736 nigel 77 }
1737 ph10 579
1738 nigel 91 /* ... else fall through for endonly */
1739 nigel 77
1740     /* End of subject assertion (\z) */
1741    
1742     case OP_EOD:
1743 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744 ph10 553 SCHECK_PARTIAL();
1745 nigel 77 ecode++;
1746     break;
1747    
1748     /* End of subject or ending \n assertion (\Z) */
1749    
1750     case OP_EODN:
1751 ph10 553 ASSERT_NL_OR_EOS:
1752     if (eptr < md->end_subject &&
1753 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754 ph10 510 MRRETURN(MATCH_NOMATCH);
1755 ph10 579
1756 ph10 553 /* Either at end of string or \n before end. */
1757 ph10 579
1758 ph10 553 SCHECK_PARTIAL();
1759 nigel 77 ecode++;
1760     break;
1761    
1762     /* Word boundary assertions */
1763    
1764     case OP_NOT_WORD_BOUNDARY:
1765     case OP_WORD_BOUNDARY:
1766     {
1767    
1768     /* Find out if the previous and current characters are "word" characters.
1769     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1770 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1771 ph10 435 partial matching. */
1772 nigel 77
1773     #ifdef SUPPORT_UTF8
1774     if (utf8)
1775     {
1776 ph10 518 /* Get status of previous character */
1777 ph10 527
1778 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1779     {
1780 ph10 409 USPTR lastptr = eptr - 1;
1781 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1782 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783 nigel 77 GETCHAR(c, lastptr);
1784 ph10 527 #ifdef SUPPORT_UCP
1785 ph10 518 if (md->use_ucp)
1786     {
1787     if (c == '_') prev_is_word = TRUE; else
1788 ph10 527 {
1789 ph10 518 int cat = UCD_CATEGORY(c);
1790     prev_is_word = (cat == ucp_L || cat == ucp_N);
1791 ph10 527 }
1792     }
1793     else
1794     #endif
1795 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796     }
1797 ph10 527
1798 ph10 518 /* Get status of next character */
1799 ph10 527
1800 ph10 443 if (eptr >= md->end_subject)
1801 nigel 77 {
1802 ph10 443 SCHECK_PARTIAL();
1803     cur_is_word = FALSE;
1804 ph10 428 }
1805     else
1806     {
1807 nigel 77 GETCHAR(c, eptr);
1808 ph10 527 #ifdef SUPPORT_UCP
1809 ph10 518 if (md->use_ucp)
1810     {
1811     if (c == '_') cur_is_word = TRUE; else
1812 ph10 527 {
1813 ph10 518 int cat = UCD_CATEGORY(c);
1814     cur_is_word = (cat == ucp_L || cat == ucp_N);
1815 ph10 527 }
1816     }
1817     else
1818     #endif
1819 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820     }
1821     }
1822     else
1823     #endif
1824    
1825 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1827 nigel 77
1828     {
1829 ph10 518 /* Get status of previous character */
1830 ph10 527
1831 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1832     {
1833 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834 ph10 527 #ifdef SUPPORT_UCP
1835 ph10 518 if (md->use_ucp)
1836     {
1837 ph10 527 c = eptr[-1];
1838 ph10 518 if (c == '_') prev_is_word = TRUE; else
1839 ph10 527 {
1840 ph10 518 int cat = UCD_CATEGORY(c);
1841     prev_is_word = (cat == ucp_L || cat == ucp_N);
1842 ph10 527 }
1843     }
1844     else
1845     #endif
1846 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847     }
1848 ph10 527
1849 ph10 518 /* Get status of next character */
1850 ph10 527
1851 ph10 443 if (eptr >= md->end_subject)
1852 ph10 428 {
1853 ph10 443 SCHECK_PARTIAL();
1854     cur_is_word = FALSE;
1855 ph10 428 }
1856 ph10 527 else
1857     #ifdef SUPPORT_UCP
1858 ph10 518 if (md->use_ucp)
1859     {
1860 ph10 527 c = *eptr;
1861 ph10 518 if (c == '_') cur_is_word = TRUE; else
1862 ph10 527 {
1863 ph10 518 int cat = UCD_CATEGORY(c);
1864     cur_is_word = (cat == ucp_L || cat == ucp_N);
1865 ph10 527 }
1866     }
1867     else
1868     #endif
1869 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870 nigel 77 }
1871    
1872     /* Now see if the situation is what we want */
1873    
1874     if ((*ecode++ == OP_WORD_BOUNDARY)?
1875     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1876 ph10 510 MRRETURN(MATCH_NOMATCH);
1877 nigel 77 }
1878     break;
1879    
1880     /* Match a single character type; inline for speed */
1881    
1882     case OP_ANY:
1883 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1884 ph10 345 /* Fall through */
1885    
1886 ph10 341 case OP_ALLANY:
1887 ph10 443 if (eptr++ >= md->end_subject)
1888 ph10 428 {
1889 ph10 443 SCHECK_PARTIAL();
1890 ph10 510 MRRETURN(MATCH_NOMATCH);
1891 ph10 443 }
1892 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1893 nigel 77 ecode++;
1894     break;
1895    
1896     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1897     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1898    
1899     case OP_ANYBYTE:
1900 ph10 443 if (eptr++ >= md->end_subject)
1901 ph10 428 {
1902 ph10 443 SCHECK_PARTIAL();
1903 ph10 510 MRRETURN(MATCH_NOMATCH);
1904 ph10 443 }
1905 nigel 77 ecode++;
1906     break;
1907    
1908     case OP_NOT_DIGIT:
1909 ph10 443 if (eptr >= md->end_subject)
1910 ph10 428 {
1911 ph10 443 SCHECK_PARTIAL();
1912 ph10 510 MRRETURN(MATCH_NOMATCH);
1913 ph10 443 }
1914 nigel 77 GETCHARINCTEST(c, eptr);
1915     if (
1916     #ifdef SUPPORT_UTF8
1917     c < 256 &&
1918     #endif
1919     (md->ctypes[c] & ctype_digit) != 0
1920     )
1921 ph10 510 MRRETURN(MATCH_NOMATCH);
1922 nigel 77 ecode++;
1923     break;
1924    
1925     case OP_DIGIT:
1926 ph10 443 if (eptr >= md->end_subject)
1927 ph10 428 {
1928 ph10 443 SCHECK_PARTIAL();
1929 ph10 510 MRRETURN(MATCH_NOMATCH);
1930 ph10 443 }
1931 nigel 77 GETCHARINCTEST(c, eptr);
1932     if (
1933     #ifdef SUPPORT_UTF8
1934     c >= 256 ||
1935     #endif
1936     (md->ctypes[c] & ctype_digit) == 0
1937     )
1938 ph10 510 MRRETURN(MATCH_NOMATCH);
1939 nigel 77 ecode++;
1940     break;
1941    
1942     case OP_NOT_WHITESPACE:
1943 ph10 443 if (eptr >= md->end_subject)
1944 ph10 428 {
1945 ph10 443 SCHECK_PARTIAL();
1946 ph10 510 MRRETURN(MATCH_NOMATCH);
1947 ph10 443 }
1948 nigel 77 GETCHARINCTEST(c, eptr);
1949     if (
1950     #ifdef SUPPORT_UTF8
1951     c < 256 &&
1952     #endif
1953     (md->ctypes[c] & ctype_space) != 0
1954     )
1955 ph10 510 MRRETURN(MATCH_NOMATCH);
1956 nigel 77 ecode++;
1957     break;
1958    
1959     case OP_WHITESPACE:
1960 ph10 443 if (eptr >= md->end_subject)
1961 ph10 428 {
1962 ph10 443 SCHECK_PARTIAL();
1963 ph10 510 MRRETURN(MATCH_NOMATCH);
1964 ph10 443 }
1965 nigel 77 GETCHARINCTEST(c, eptr);
1966     if (
1967     #ifdef SUPPORT_UTF8
1968     c >= 256 ||
1969     #endif
1970     (md->ctypes[c] & ctype_space) == 0
1971     )
1972 ph10 510 MRRETURN(MATCH_NOMATCH);
1973 nigel 77 ecode++;
1974     break;
1975    
1976     case OP_NOT_WORDCHAR:
1977 ph10 443 if (eptr >= md->end_subject)
1978 ph10 428 {
1979 ph10 443 SCHECK_PARTIAL();
1980 ph10 510 MRRETURN(MATCH_NOMATCH);
1981 ph10 443 }
1982 nigel 77 GETCHARINCTEST(c, eptr);
1983     if (
1984     #ifdef SUPPORT_UTF8
1985     c < 256 &&
1986     #endif
1987     (md->ctypes[c] & ctype_word) != 0
1988     )
1989 ph10 510 MRRETURN(MATCH_NOMATCH);
1990 nigel 77 ecode++;
1991     break;
1992    
1993     case OP_WORDCHAR:
1994 ph10 443 if (eptr >= md->end_subject)
1995 ph10 428 {
1996 ph10 443 SCHECK_PARTIAL();
1997 ph10 510 MRRETURN(MATCH_NOMATCH);
1998 ph10 443 }
1999 nigel 77 GETCHARINCTEST(c, eptr);
2000     if (
2001     #ifdef SUPPORT_UTF8
2002     c >= 256 ||
2003     #endif
2004     (md->ctypes[c] & ctype_word) == 0
2005     )
2006 ph10 510 MRRETURN(MATCH_NOMATCH);
2007 nigel 77 ecode++;
2008     break;
2009    
2010 nigel 93 case OP_ANYNL:
2011 ph10 443 if (eptr >= md->end_subject)
2012 ph10 428 {
2013 ph10 443 SCHECK_PARTIAL();
2014 ph10 510 MRRETURN(MATCH_NOMATCH);
2015 ph10 443 }
2016 nigel 93 GETCHARINCTEST(c, eptr);
2017     switch(c)
2018     {
2019 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2020 ph10 600
2021 nigel 93 case 0x000d:
2022     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2023     break;
2024 ph10 231
2025 nigel 93 case 0x000a:
2026 ph10 231 break;
2027    
2028 nigel 93 case 0x000b:
2029     case 0x000c:
2030     case 0x0085:
2031     case 0x2028:
2032     case 0x2029:
2033 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2034 nigel 93 break;
2035     }
2036     ecode++;
2037     break;
2038    
2039 ph10 178 case OP_NOT_HSPACE:
2040 ph10 443 if (eptr >= md->end_subject)
2041 ph10 428 {
2042 ph10 443 SCHECK_PARTIAL();
2043 ph10 510 MRRETURN(MATCH_NOMATCH);
2044 ph10 443 }
2045 ph10 178 GETCHARINCTEST(c, eptr);
2046     switch(c)
2047     {
2048     default: break;
2049     case 0x09: /* HT */
2050     case 0x20: /* SPACE */
2051     case 0xa0: /* NBSP */
2052     case 0x1680: /* OGHAM SPACE MARK */
2053     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2054     case 0x2000: /* EN QUAD */
2055     case 0x2001: /* EM QUAD */
2056     case 0x2002: /* EN SPACE */
2057     case 0x2003: /* EM SPACE */
2058     case 0x2004: /* THREE-PER-EM SPACE */
2059     case 0x2005: /* FOUR-PER-EM SPACE */
2060     case 0x2006: /* SIX-PER-EM SPACE */
2061     case 0x2007: /* FIGURE SPACE */
2062     case 0x2008: /* PUNCTUATION SPACE */
2063     case 0x2009: /* THIN SPACE */
2064     case 0x200A: /* HAIR SPACE */
2065     case 0x202f: /* NARROW NO-BREAK SPACE */
2066     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2067     case 0x3000: /* IDEOGRAPHIC SPACE */
2068 ph10 510 MRRETURN(MATCH_NOMATCH);
2069 ph10 178 }
2070     ecode++;
2071     break;
2072    
2073     case OP_HSPACE:
2074 ph10 443 if (eptr >= md->end_subject)
2075 ph10 428 {
2076 ph10 443 SCHECK_PARTIAL();
2077 ph10 510 MRRETURN(MATCH_NOMATCH);
2078 ph10 443 }
2079 ph10 178 GETCHARINCTEST(c, eptr);
2080     switch(c)
2081     {
2082 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2083 ph10 178 case 0x09: /* HT */
2084     case 0x20: /* SPACE */
2085     case 0xa0: /* NBSP */
2086     case 0x1680: /* OGHAM SPACE MARK */
2087     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2088     case 0x2000: /* EN QUAD */
2089     case 0x2001: /* EM QUAD */
2090     case 0x2002: /* EN SPACE */
2091     case 0x2003: /* EM SPACE */
2092     case 0x2004: /* THREE-PER-EM SPACE */
2093     case 0x2005: /* FOUR-PER-EM SPACE */
2094     case 0x2006: /* SIX-PER-EM SPACE */
2095     case 0x2007: /* FIGURE SPACE */
2096     case 0x2008: /* PUNCTUATION SPACE */
2097     case 0x2009: /* THIN SPACE */
2098     case 0x200A: /* HAIR SPACE */
2099     case 0x202f: /* NARROW NO-BREAK SPACE */
2100     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2101     case 0x3000: /* IDEOGRAPHIC SPACE */
2102     break;
2103     }
2104     ecode++;
2105     break;
2106    
2107     case OP_NOT_VSPACE:
2108 ph10 443 if (eptr >= md->end_subject)
2109 ph10 428 {
2110 ph10 443 SCHECK_PARTIAL();
2111 ph10 510 MRRETURN(MATCH_NOMATCH);
2112 ph10 443 }
2113 ph10 178 GETCHARINCTEST(c, eptr);
2114     switch(c)
2115     {
2116     default: break;
2117     case 0x0a: /* LF */
2118     case 0x0b: /* VT */
2119     case 0x0c: /* FF */
2120     case 0x0d: /* CR */
2121     case 0x85: /* NEL */
2122     case 0x2028: /* LINE SEPARATOR */
2123     case 0x2029: /* PARAGRAPH SEPARATOR */
2124 ph10 510 MRRETURN(MATCH_NOMATCH);
2125 ph10 178 }
2126     ecode++;
2127     break;
2128    
2129     case OP_VSPACE:
2130 ph10 443 if (eptr >= md->end_subject)
2131 ph10 428 {
2132 ph10 443 SCHECK_PARTIAL();
2133 ph10 510 MRRETURN(MATCH_NOMATCH);
2134 ph10 443 }
2135 ph10 178 GETCHARINCTEST(c, eptr);
2136     switch(c)
2137     {
2138 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2139 ph10 178 case 0x0a: /* LF */
2140     case 0x0b: /* VT */
2141     case 0x0c: /* FF */
2142     case 0x0d: /* CR */
2143     case 0x85: /* NEL */
2144     case 0x2028: /* LINE SEPARATOR */
2145     case 0x2029: /* PARAGRAPH SEPARATOR */
2146     break;
2147     }
2148     ecode++;
2149     break;
2150    
2151 nigel 77 #ifdef SUPPORT_UCP
2152     /* Check the next character by Unicode property. We will get here only
2153     if the support is in the binary; otherwise a compile-time error occurs. */
2154    
2155     case OP_PROP:
2156     case OP_NOTPROP:
2157 ph10 443 if (eptr >= md->end_subject)
2158 ph10 428 {
2159 ph10 443 SCHECK_PARTIAL();
2160 ph10 510 MRRETURN(MATCH_NOMATCH);
2161 ph10 443 }
2162 nigel 77 GETCHARINCTEST(c, eptr);
2163     {
2164 ph10 384 const ucd_record *prop = GET_UCD(c);
2165 nigel 77
2166 nigel 87 switch(ecode[1])
2167     {
2168     case PT_ANY:
2169 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2170 nigel 87 break;
2171 nigel 77
2172 nigel 87 case PT_LAMP:
2173 ph10 349 if ((prop->chartype == ucp_Lu ||
2174     prop->chartype == ucp_Ll ||
2175     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2176 ph10 510 MRRETURN(MATCH_NOMATCH);
2177 ph10 517 break;
2178 nigel 87
2179     case PT_GC:
2180 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2181 ph10 510 MRRETURN(MATCH_NOMATCH);
2182 nigel 87 break;
2183    
2184     case PT_PC:
2185 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2186 ph10 510 MRRETURN(MATCH_NOMATCH);
2187 nigel 87 break;
2188    
2189     case PT_SC:
2190 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2191 ph10 510 MRRETURN(MATCH_NOMATCH);
2192 nigel 87 break;
2193 ph10 527
2194 ph10 517 /* These are specials */
2195 ph10 527
2196 ph10 517 case PT_ALNUM:
2197     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2198     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2199     MRRETURN(MATCH_NOMATCH);
2200 ph10 527 break;
2201    
2202 ph10 517 case PT_SPACE: /* Perl space */
2203     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2205     == (op == OP_NOTPROP))
2206     MRRETURN(MATCH_NOMATCH);
2207 ph10 527 break;
2208    
2209 ph10 517 case PT_PXSPACE: /* POSIX space */
2210     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2211 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2212 ph10 517 c == CHAR_FF || c == CHAR_CR)
2213     == (op == OP_NOTPROP))
2214     MRRETURN(MATCH_NOMATCH);
2215 ph10 527 break;
2216 nigel 87
2217 ph10 527 case PT_WORD:
2218 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2219 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2220 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2221     MRRETURN(MATCH_NOMATCH);
2222 ph10 527 break;
2223    
2224 ph10 517 /* This should never occur */
2225    
2226 nigel 87 default:
2227     RRETURN(PCRE_ERROR_INTERNAL);
2228 nigel 77 }
2229 nigel 87
2230     ecode += 3;
2231 nigel 77 }
2232     break;
2233    
2234     /* Match an extended Unicode sequence. We will get here only if the support
2235     is in the binary; otherwise a compile-time error occurs. */
2236    
2237     case OP_EXTUNI:
2238 ph10 443 if (eptr >= md->end_subject)
2239 ph10 428 {
2240 ph10 443 SCHECK_PARTIAL();
2241 ph10 510 MRRETURN(MATCH_NOMATCH);
2242 ph10 443 }
2243 nigel 77 GETCHARINCTEST(c, eptr);
2244     {
2245 ph10 349 int category = UCD_CATEGORY(c);
2246 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2247 nigel 77 while (eptr < md->end_subject)
2248     {
2249     int len = 1;
2250     if (!utf8) c = *eptr; else
2251     {
2252     GETCHARLEN(c, eptr, len);
2253     }
2254 ph10 349 category = UCD_CATEGORY(c);
2255 nigel 77 if (category != ucp_M) break;
2256     eptr += len;
2257     }
2258     }
2259     ecode++;
2260     break;
2261     #endif
2262    
2263    
2264     /* Match a back reference, possibly repeatedly. Look past the end of the
2265     item to see if there is repeat information following. The code is similar
2266     to that for character classes, but repeated for efficiency. Then obey
2267     similar code to character type repeats - written out again for speed.
2268     However, if the referenced string is the empty string, always treat
2269     it as matched, any number of times (otherwise there could be infinite
2270     loops). */
2271    
2272     case OP_REF:
2273 ph10 595 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2274     ecode += 3;
2275 ph10 345
2276 ph10 595 /* If the reference is unset, there are two possibilities:
2277 ph10 345
2278 ph10 595 (a) In the default, Perl-compatible state, set the length negative;
2279     this ensures that every attempt at a match fails. We can't just fail
2280     here, because of the possibility of quantifiers with zero minima.
2281 ph10 345
2282 ph10 595 (b) If the JavaScript compatibility flag is set, set the length to zero
2283     so that the back reference matches an empty string.
2284 ph10 345
2285 ph10 595 Otherwise, set the length to the length of what was matched by the
2286     referenced subpattern. */
2287 ph10 345
2288 ph10 595 if (offset >= offset_top || md->offset_vector[offset] < 0)
2289     length = (md->jscript_compat)? 0 : -1;
2290     else
2291     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2292 nigel 77
2293 ph10 595 /* Set up for repetition, or handle the non-repeated case */
2294 nigel 77
2295 ph10 595 switch (*ecode)
2296     {
2297     case OP_CRSTAR:
2298     case OP_CRMINSTAR:
2299     case OP_CRPLUS:
2300     case OP_CRMINPLUS:
2301     case OP_CRQUERY:
2302     case OP_CRMINQUERY:
2303     c = *ecode++ - OP_CRSTAR;
2304     minimize = (c & 1) != 0;
2305     min = rep_min[c]; /* Pick up values from tables; */
2306     max = rep_max[c]; /* zero for max => infinity */
2307     if (max == 0) max = INT_MAX;
2308     break;
2309 nigel 77
2310 ph10 595 case OP_CRRANGE:
2311     case OP_CRMINRANGE:
2312     minimize = (*ecode == OP_CRMINRANGE);
2313     min = GET2(ecode, 1);
2314     max = GET2(ecode, 3);
2315     if (max == 0) max = INT_MAX;
2316     ecode += 5;
2317     break;
2318 nigel 77
2319 ph10 595 default: /* No repeat follows */
2320     if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2321     {
2322     CHECK_PARTIAL();
2323     MRRETURN(MATCH_NOMATCH);
2324 nigel 77 }
2325 ph10 595 eptr += length;
2326     continue; /* With the main loop */
2327     }
2328 nigel 77
2329 ph10 595 /* Handle repeated back references. If the length of the reference is
2330     zero, just continue with the main loop. */
2331 ph10 443
2332 ph10 595 if (length == 0) continue;
2333 nigel 77
2334 ph10 595 /* First, ensure the minimum number of matches are present. We get back
2335     the length of the reference string explicitly rather than passing the
2336     address of eptr, so that eptr can be a register variable. */
2337 nigel 77
2338 ph10 595 for (i = 1; i <= min; i++)
2339     {
2340     int slength;
2341     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2342 nigel 77 {
2343 ph10 595 CHECK_PARTIAL();
2344     MRRETURN(MATCH_NOMATCH);
2345 nigel 77 }
2346 ph10 595 eptr += slength;
2347     }
2348 nigel 77
2349 ph10 595 /* If min = max, continue at the same level without recursion.
2350     They are not both allowed to be zero. */
2351 nigel 77
2352 ph10 595 if (min == max) continue;
2353 nigel 77
2354 ph10 595 /* If minimizing, keep trying and advancing the pointer */
2355 nigel 77
2356 ph10 595 if (minimize)
2357     {
2358     for (fi = min;; fi++)
2359 nigel 77 {
2360 ph10 595 int slength;
2361     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2362     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363     if (fi >= max) MRRETURN(MATCH_NOMATCH);
2364     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2365 nigel 77 {
2366 ph10 595 CHECK_PARTIAL();
2367     MRRETURN(MATCH_NOMATCH);
2368 nigel 77 }
2369 ph10 595 eptr += slength;
2370 nigel 77 }
2371 ph10 595 /* Control never gets here */
2372     }
2373 nigel 77
2374 ph10 595 /* If maximizing, find the longest string and work backwards */
2375 nigel 77
2376 ph10 595 else
2377     {
2378     pp = eptr;
2379     for (i = min; i < max; i++)
2380 nigel 77 {
2381 ph10 595 int slength;
2382     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2383 nigel 77 {
2384 ph10 595 CHECK_PARTIAL();
2385     break;
2386 nigel 77 }
2387 ph10 595 eptr += slength;
2388 nigel 77 }
2389 ph10 595 while (eptr >= pp)
2390     {
2391     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2392     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2393     eptr -= length;
2394     }
2395     MRRETURN(MATCH_NOMATCH);
2396 nigel 77 }
2397     /* Control never gets here */
2398    
2399     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2400     used when all the characters in the class have values in the range 0-255,
2401     and either the matching is caseful, or the characters are in the range
2402     0-127 when UTF-8 processing is enabled. The only difference between
2403     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2404     encountered.
2405    
2406     First, look past the end of the item to see if there is repeat information
2407     following. Then obey similar code to character type repeats - written out
2408     again for speed. */
2409    
2410     case OP_NCLASS:
2411     case OP_CLASS:
2412     {
2413     data = ecode + 1; /* Save for matching */
2414     ecode += 33; /* Advance past the item */
2415    
2416     switch (*ecode)
2417     {
2418     case OP_CRSTAR:
2419     case OP_CRMINSTAR:
2420     case OP_CRPLUS:
2421     case OP_CRMINPLUS:
2422     case OP_CRQUERY:
2423     case OP_CRMINQUERY:
2424     c = *ecode++ - OP_CRSTAR;
2425     minimize = (c & 1) != 0;
2426     min = rep_min[c]; /* Pick up values from tables; */
2427     max = rep_max[c]; /* zero for max => infinity */
2428     if (max == 0) max = INT_MAX;
2429     break;
2430    
2431     case OP_CRRANGE:
2432     case OP_CRMINRANGE:
2433     minimize = (*ecode == OP_CRMINRANGE);
2434     min = GET2(ecode, 1);
2435     max = GET2(ecode, 3);
2436     if (max == 0) max = INT_MAX;
2437     ecode += 5;
2438     break;
2439    
2440     default: /* No repeat follows */
2441     min = max = 1;
2442     break;
2443     }
2444    
2445     /* First, ensure the minimum number of matches are present. */
2446    
2447     #ifdef SUPPORT_UTF8
2448     /* UTF-8 mode */
2449     if (utf8)
2450     {
2451     for (i = 1; i <= min; i++)
2452     {
2453 ph10 427 if (eptr >= md->end_subject)
2454 ph10 426 {
2455 ph10 428 SCHECK_PARTIAL();
2456 ph10 510 MRRETURN(MATCH_NOMATCH);
2457 ph10 427 }
2458 nigel 77 GETCHARINC(c, eptr);
2459     if (c > 255)
2460     {
2461 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2462 nigel 77 }
2463     else
2464     {
2465 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2466 nigel 77 }
2467     }
2468     }
2469     else
2470     #endif
2471     /* Not UTF-8 mode */
2472     {
2473     for (i = 1; i <= min; i++)
2474     {
2475 ph10 427 if (eptr >= md->end_subject)
2476 ph10 426 {
2477 ph10 428 SCHECK_PARTIAL();
2478 ph10 510 MRRETURN(MATCH_NOMATCH);
2479 ph10 427 }
2480 nigel 77 c = *eptr++;
2481 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2482 nigel 77 }
2483     }
2484    
2485     /* If max == min we can continue with the main loop without the
2486     need to recurse. */
2487    
2488     if (min == max) continue;
2489    
2490     /* If minimizing, keep testing the rest of the expression and advancing
2491     the pointer while it matches the class. */
2492    
2493     if (minimize)
2494     {
2495     #ifdef SUPPORT_UTF8
2496     /* UTF-8 mode */
2497     if (utf8)
2498     {
2499     for (fi = min;; fi++)
2500     {
2501 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2502 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2503 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2504 ph10 427 if (eptr >= md->end_subject)
2505 ph10 426 {
2506 ph10 427 SCHECK_PARTIAL();
2507 ph10 510 MRRETURN(MATCH_NOMATCH);
2508 ph10 427 }
2509 nigel 77 GETCHARINC(c, eptr);
2510     if (c > 255)
2511     {
2512 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2513 nigel 77 }
2514     else
2515     {
2516 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2517 nigel 77 }
2518     }
2519     }
2520     else
2521     #endif
2522     /* Not UTF-8 mode */
2523     {
2524     for (fi = min;; fi++)
2525     {
2526 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2527 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2528 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2529 ph10 427 if (eptr >= md->end_subject)
2530 ph10 426 {
2531 ph10 427 SCHECK_PARTIAL();
2532 ph10 510 MRRETURN(MATCH_NOMATCH);
2533 ph10 427 }
2534 nigel 77 c = *eptr++;
2535 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2536 nigel 77 }
2537     }
2538     /* Control never gets here */
2539     }
2540    
2541     /* If maximizing, find the longest possible run, then work backwards. */
2542    
2543     else
2544     {
2545     pp = eptr;
2546    
2547     #ifdef SUPPORT_UTF8
2548     /* UTF-8 mode */
2549     if (utf8)
2550     {
2551     for (i = min; i < max; i++)
2552     {
2553     int len = 1;
2554 ph10 463 if (eptr >= md->end_subject)
2555 ph10 462 {
2556 ph10 463 SCHECK_PARTIAL();
2557 ph10 462 break;
2558 ph10 463 }
2559 nigel 77 GETCHARLEN(c, eptr, len);
2560     if (c > 255)
2561     {
2562     if (op == OP_CLASS) break;
2563     }
2564     else
2565     {
2566     if ((data[c/8] & (1 << (c&7))) == 0) break;
2567     }
2568     eptr += len;
2569     }
2570     for (;;)
2571     {
2572 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2573 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2574     if (eptr-- == pp) break; /* Stop if tried at original pos */
2575     BACKCHAR(eptr);
2576     }
2577     }
2578     else
2579     #endif
2580     /* Not UTF-8 mode */
2581     {
2582     for (i = min; i < max; i++)
2583     {
2584 ph10 463 if (eptr >= md->end_subject)
2585 ph10 462 {
2586 ph10 463 SCHECK_PARTIAL();
2587 ph10 462 break;
2588 ph10 463 }
2589 nigel 77 c = *eptr;
2590     if ((data[c/8] & (1 << (c&7))) == 0) break;
2591     eptr++;
2592     }
2593     while (eptr >= pp)
2594     {
2595 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2596 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2597 nigel 77 eptr--;
2598     }
2599     }
2600    
2601 ph10 510 MRRETURN(MATCH_NOMATCH);
2602 nigel 77 }
2603     }
2604     /* Control never gets here */
2605    
2606    
2607     /* Match an extended character class. This opcode is encountered only
2608 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2609     mode, because Unicode properties are supported in non-UTF-8 mode. */
2610 nigel 77
2611     #ifdef SUPPORT_UTF8
2612     case OP_XCLASS:
2613     {
2614     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2615     ecode += GET(ecode, 1); /* Advance past the item */
2616    
2617     switch (*ecode)
2618     {
2619     case OP_CRSTAR:
2620     case OP_CRMINSTAR:
2621     case OP_CRPLUS:
2622     case OP_CRMINPLUS:
2623     case OP_CRQUERY:
2624     case OP_CRMINQUERY:
2625     c = *ecode++ - OP_CRSTAR;
2626     minimize = (c & 1) != 0;
2627     min = rep_min[c]; /* Pick up values from tables; */
2628     max = rep_max[c]; /* zero for max => infinity */
2629     if (max == 0) max = INT_MAX;
2630     break;
2631    
2632     case OP_CRRANGE:
2633     case OP_CRMINRANGE:
2634     minimize = (*ecode == OP_CRMINRANGE);
2635     min = GET2(ecode, 1);
2636     max = GET2(ecode, 3);
2637     if (max == 0) max = INT_MAX;
2638     ecode += 5;
2639     break;
2640    
2641     default: /* No repeat follows */
2642     min = max = 1;
2643     break;
2644     }
2645    
2646     /* First, ensure the minimum number of matches are present. */
2647    
2648     for (i = 1; i <= min; i++)
2649     {
2650 ph10 427 if (eptr >= md->end_subject)
2651 ph10 426 {
2652     SCHECK_PARTIAL();
2653 ph10 510 MRRETURN(MATCH_NOMATCH);
2654 ph10 427 }
2655 ph10 384 GETCHARINCTEST(c, eptr);
2656 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2657 nigel 77 }
2658    
2659     /* If max == min we can continue with the main loop without the
2660     need to recurse. */
2661    
2662     if (min == max) continue;
2663    
2664     /* If minimizing, keep testing the rest of the expression and advancing
2665     the pointer while it matches the class. */
2666    
2667     if (minimize)
2668     {
2669     for (fi = min;; fi++)
2670     {
2671 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2672 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2673 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2674 ph10 427 if (eptr >= md->end_subject)
2675 ph10 426 {
2676 ph10 427 SCHECK_PARTIAL();
2677 ph10 510 MRRETURN(MATCH_NOMATCH);
2678 ph10 427 }
2679 ph10 384 GETCHARINCTEST(c, eptr);
2680 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2681 nigel 77 }
2682     /* Control never gets here */
2683     }
2684    
2685     /* If maximizing, find the longest possible run, then work backwards. */
2686    
2687     else
2688     {
2689     pp = eptr;
2690     for (i = min; i < max; i++)
2691     {
2692     int len = 1;
2693 ph10 463 if (eptr >= md->end_subject)
2694 ph10 462 {
2695 ph10 463 SCHECK_PARTIAL();
2696 ph10 462 break;
2697 ph10 463 }
2698 ph10 384 GETCHARLENTEST(c, eptr, len);
2699 nigel 77 if (!_pcre_xclass(c, data)) break;
2700     eptr += len;
2701     }
2702     for(;;)
2703     {
2704 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2705 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2706     if (eptr-- == pp) break; /* Stop if tried at original pos */
2707 ph10 214 if (utf8) BACKCHAR(eptr);
2708 nigel 77 }
2709 ph10 510 MRRETURN(MATCH_NOMATCH);
2710 nigel 77 }
2711    
2712     /* Control never gets here */
2713     }
2714     #endif /* End of XCLASS */
2715    
2716     /* Match a single character, casefully */
2717    
2718     case OP_CHAR:
2719     #ifdef SUPPORT_UTF8
2720     if (utf8)
2721     {
2722     length = 1;
2723     ecode++;
2724     GETCHARLEN(fc, ecode, length);
2725 ph10 443 if (length > md->end_subject - eptr)
2726 ph10 428 {
2727     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2728 ph10 510 MRRETURN(MATCH_NOMATCH);
2729 ph10 443 }
2730 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2731 nigel 77 }
2732     else
2733     #endif
2734    
2735     /* Non-UTF-8 mode */
2736     {
2737 ph10 443 if (md->end_subject - eptr < 1)
2738 ph10 428 {
2739     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2740 ph10 510 MRRETURN(MATCH_NOMATCH);
2741 ph10 443 }
2742 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2743 nigel 77 ecode += 2;
2744     }
2745     break;
2746    
2747     /* Match a single character, caselessly */
2748    
2749     case OP_CHARNC:
2750     #ifdef SUPPORT_UTF8
2751     if (utf8)
2752     {
2753     length = 1;
2754     ecode++;
2755     GETCHARLEN(fc, ecode, length);
2756    
2757 ph10 443 if (length > md->end_subject - eptr)
2758 ph10 428 {
2759     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2760 ph10 510 MRRETURN(MATCH_NOMATCH);
2761 ph10 443 }
2762 nigel 77
2763     /* If the pattern character's value is < 128, we have only one byte, and
2764     can use the fast lookup table. */
2765    
2766     if (fc < 128)
2767     {
2768 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2769 nigel 77 }
2770    
2771     /* Otherwise we must pick up the subject character */
2772    
2773     else
2774     {
2775 nigel 93 unsigned int dc;
2776 nigel 77 GETCHARINC(dc, eptr);
2777     ecode += length;
2778    
2779     /* If we have Unicode property support, we can use it to test the other
2780 nigel 87 case of the character, if there is one. */
2781 nigel 77
2782     if (fc != dc)
2783     {
2784     #ifdef SUPPORT_UCP
2785 ph10 349 if (dc != UCD_OTHERCASE(fc))
2786 nigel 77 #endif
2787 ph10 510 MRRETURN(MATCH_NOMATCH);
2788 nigel 77 }
2789     }
2790     }
2791     else
2792     #endif /* SUPPORT_UTF8 */
2793    
2794     /* Non-UTF-8 mode */
2795     {
2796 ph10 443 if (md->end_subject - eptr < 1)
2797 ph10 428 {
2798 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2799 ph10 510 MRRETURN(MATCH_NOMATCH);
2800 ph10 443 }
2801 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2802 nigel 77 ecode += 2;
2803     }
2804     break;
2805    
2806 nigel 93 /* Match a single character repeatedly. */
2807 nigel 77
2808     case OP_EXACT:
2809     min = max = GET2(ecode, 1);
2810     ecode += 3;
2811     goto REPEATCHAR;
2812    
2813 nigel 93 case OP_POSUPTO:
2814     possessive = TRUE;
2815     /* Fall through */
2816    
2817 nigel 77 case OP_UPTO:
2818     case OP_MINUPTO:
2819     min = 0;
2820     max = GET2(ecode, 1);
2821     minimize = *ecode == OP_MINUPTO;
2822     ecode += 3;
2823     goto REPEATCHAR;
2824    
2825 nigel 93 case OP_POSSTAR:
2826     possessive = TRUE;
2827     min = 0;
2828     max = INT_MAX;
2829     ecode++;
2830     goto REPEATCHAR;
2831    
2832     case OP_POSPLUS:
2833     possessive = TRUE;
2834     min = 1;
2835     max = INT_MAX;
2836     ecode++;
2837     goto REPEATCHAR;
2838    
2839     case OP_POSQUERY:
2840     possessive = TRUE;
2841     min = 0;
2842     max = 1;
2843     ecode++;
2844     goto REPEATCHAR;
2845    
2846 nigel 77 case OP_STAR:
2847     case OP_MINSTAR:
2848     case OP_PLUS:
2849     case OP_MINPLUS:
2850     case OP_QUERY:
2851     case OP_MINQUERY:
2852     c = *ecode++ - OP_STAR;
2853     minimize = (c & 1) != 0;
2854 ph10 443
2855 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2856     max = rep_max[c]; /* zero for max => infinity */
2857     if (max == 0) max = INT_MAX;
2858    
2859 ph10 426 /* Common code for all repeated single-character matches. */
2860 nigel 77
2861     REPEATCHAR:
2862     #ifdef SUPPORT_UTF8
2863     if (utf8)
2864     {
2865     length = 1;
2866     charptr = ecode;
2867     GETCHARLEN(fc, ecode, length);
2868     ecode += length;
2869    
2870     /* Handle multibyte character matching specially here. There is
2871     support for caseless matching if UCP support is present. */
2872    
2873     if (length > 1)
2874     {
2875     #ifdef SUPPORT_UCP
2876 nigel 93 unsigned int othercase;
2877 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2878 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2879 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2880 ph10 115 else oclength = 0;
2881 nigel 77 #endif /* SUPPORT_UCP */
2882    
2883     for (i = 1; i <= min; i++)
2884     {
2885 ph10 426 if (eptr <= md->end_subject - length &&
2886     memcmp(eptr, charptr, length) == 0) eptr += length;
2887 ph10 123 #ifdef SUPPORT_UCP
2888 ph10 426 else if (oclength > 0 &&
2889     eptr <= md->end_subject - oclength &&
2890     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2891     #endif /* SUPPORT_UCP */
2892 nigel 77 else
2893     {
2894 ph10 426 CHECK_PARTIAL();
2895 ph10 510 MRRETURN(MATCH_NOMATCH);
2896 nigel 77 }
2897     }
2898    
2899     if (min == max) continue;
2900    
2901     if (minimize)
2902     {
2903     for (fi = min;; fi++)
2904     {
2905 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2906 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2908 ph10 426 if (eptr <= md->end_subject - length &&
2909     memcmp(eptr, charptr, length) == 0) eptr += length;
2910 ph10 123 #ifdef SUPPORT_UCP
2911 ph10 426 else if (oclength > 0 &&
2912     eptr <= md->end_subject - oclength &&
2913     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2914     #endif /* SUPPORT_UCP */
2915 nigel 77 else
2916     {
2917 ph10 426 CHECK_PARTIAL();
2918 ph10 510 MRRETURN(MATCH_NOMATCH);
2919 nigel 77 }
2920     }
2921     /* Control never gets here */
2922     }
2923 nigel 93
2924     else /* Maximize */
2925 nigel 77 {
2926     pp = eptr;
2927     for (i = min; i < max; i++)
2928     {
2929 ph10 426 if (eptr <= md->end_subject - length &&
2930     memcmp(eptr, charptr, length) == 0) eptr += length;
2931 ph10 123 #ifdef SUPPORT_UCP
2932 ph10 426 else if (oclength > 0 &&
2933     eptr <= md->end_subject - oclength &&
2934     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2935     #endif /* SUPPORT_UCP */
2936 ph10 463 else
2937 ph10 462 {
2938 ph10 463 CHECK_PARTIAL();
2939 ph10 462 break;
2940 ph10 463 }
2941 nigel 77 }
2942 nigel 93
2943     if (possessive) continue;
2944 ph10 427
2945 ph10 120 for(;;)
2946 ph10 426 {
2947     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2948     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2949 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2950 ph10 115 #ifdef SUPPORT_UCP
2951 ph10 426 eptr--;
2952     BACKCHAR(eptr);
2953 ph10 123 #else /* without SUPPORT_UCP */
2954 ph10 426 eptr -= length;
2955 ph10 123 #endif /* SUPPORT_UCP */
2956 ph10 426 }
2957 nigel 77 }
2958     /* Control never gets here */
2959     }
2960    
2961     /* If the length of a UTF-8 character is 1, we fall through here, and
2962     obey the code as for non-UTF-8 characters below, though in this case the
2963     value of fc will always be < 128. */
2964     }
2965     else
2966     #endif /* SUPPORT_UTF8 */
2967    
2968     /* When not in UTF-8 mode, load a single-byte character. */
2969    
2970 ph10 426 fc = *ecode++;
2971 ph10 443
2972 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2973     may not be in UTF-8 mode. The code is duplicated for the caseless and
2974     caseful cases, for speed, since matching characters is likely to be quite
2975     common. First, ensure the minimum number of matches are present. If min =
2976     max, continue at the same level without recursing. Otherwise, if
2977     minimizing, keep trying the rest of the expression and advancing one
2978     matching character if failing, up to the maximum. Alternatively, if
2979     maximizing, find the maximum number of characters and work backwards. */
2980    
2981     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2982     max, eptr));
2983    
2984     if ((ims & PCRE_CASELESS) != 0)
2985     {
2986     fc = md->lcc[fc];
2987     for (i = 1; i <= min; i++)
2988 ph10 426 {
2989     if (eptr >= md->end_subject)
2990     {
2991     SCHECK_PARTIAL();
2992 ph10 510 MRRETURN(MATCH_NOMATCH);
2993 ph10 426 }
2994 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2995 ph10 426 }
2996 nigel 77 if (min == max) continue;
2997     if (minimize)
2998     {
2999     for (fi = min;; fi++)
3000     {
3001 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
3002 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3004 ph10 426 if (eptr >= md->end_subject)
3005     {
3006 ph10 427 SCHECK_PARTIAL();
3007 ph10 510 MRRETURN(MATCH_NOMATCH);
3008 ph10 426 }
3009 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3010 nigel 77 }
3011     /* Control never gets here */
3012     }
3013 nigel 93 else /* Maximize */
3014 nigel 77 {
3015     pp = eptr;
3016     for (i = min; i < max; i++)
3017     {
3018 ph10 463 if (eptr >= md->end_subject)
3019 ph10 462 {
3020     SCHECK_PARTIAL();
3021     break;
3022 ph10 463 }
3023 ph10 462 if (fc != md->lcc[*eptr]) break;
3024 nigel 77 eptr++;
3025     }
3026 ph10 427
3027 nigel 93 if (possessive) continue;
3028 ph10 427
3029 nigel 77 while (eptr >= pp)
3030     {
3031 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3032 nigel 77 eptr--;
3033     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3034     }
3035 ph10 510 MRRETURN(MATCH_NOMATCH);
3036 nigel 77 }
3037     /* Control never gets here */
3038     }
3039    
3040     /* Caseful comparisons (includes all multi-byte characters) */
3041    
3042     else
3043     {
3044 ph10 427 for (i = 1; i <= min; i++)
3045 ph10 426 {
3046     if (eptr >= md->end_subject)
3047     {
3048     SCHECK_PARTIAL();
3049 ph10 510 MRRETURN(MATCH_NOMATCH);
3050 ph10 426 }
3051 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3052 ph10 427 }
3053 ph10 443
3054 nigel 77 if (min == max) continue;
3055 ph10 443
3056 nigel 77 if (minimize)
3057     {
3058     for (fi = min;; fi++)
3059     {
3060 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3061 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3062 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3063 ph10 426 if (eptr >= md->end_subject)
3064 ph10 427 {
3065 ph10 426 SCHECK_PARTIAL();
3066 ph10 510 MRRETURN(MATCH_NOMATCH);
3067 ph10 427 }
3068 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3069 nigel 77 }
3070     /* Control never gets here */
3071     }
3072 nigel 93 else /* Maximize */
3073 nigel 77 {
3074     pp = eptr;
3075     for (i = min; i < max; i++)
3076     {
3077 ph10 463 if (eptr >= md->end_subject)
3078 ph10 462 {
3079 ph10 463 SCHECK_PARTIAL();
3080 ph10 462 break;
3081 ph10 463 }
3082 ph10 462 if (fc != *eptr) break;
3083 nigel 77 eptr++;
3084     }
3085 nigel 93 if (possessive) continue;
3086 ph10 443
3087 nigel 77 while (eptr >= pp)
3088     {
3089 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3090 nigel 77 eptr--;
3091     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3092     }
3093 ph10 510 MRRETURN(MATCH_NOMATCH);
3094 nigel 77 }
3095     }
3096     /* Control never gets here */
3097    
3098     /* Match a negated single one-byte character. The character we are
3099     checking can be multibyte. */
3100    
3101     case OP_NOT:
3102 ph10 443 if (eptr >= md->end_subject)
3103 ph10 428 {
3104 ph10 443 SCHECK_PARTIAL();
3105 ph10 510 MRRETURN(MATCH_NOMATCH);
3106 ph10 443 }
3107 nigel 77 ecode++;
3108     GETCHARINCTEST(c, eptr);
3109     if ((ims & PCRE_CASELESS) != 0)
3110     {
3111     #ifdef SUPPORT_UTF8
3112     if (c < 256)
3113     #endif
3114     c = md->lcc[c];
3115 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3116 nigel 77 }
3117     else
3118     {
3119 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3120 nigel 77 }
3121     break;
3122    
3123     /* Match a negated single one-byte character repeatedly. This is almost a
3124     repeat of the code for a repeated single character, but I haven't found a
3125     nice way of commoning these up that doesn't require a test of the
3126     positive/negative option for each character match. Maybe that wouldn't add
3127     very much to the time taken, but character matching *is* what this is all
3128     about... */
3129    
3130     case OP_NOTEXACT:
3131     min = max = GET2(ecode, 1);
3132     ecode += 3;
3133     goto REPEATNOTCHAR;
3134    
3135     case OP_NOTUPTO:
3136     case OP_NOTMINUPTO:
3137     min = 0;
3138     max = GET2(ecode, 1);
3139     minimize = *ecode == OP_NOTMINUPTO;
3140     ecode += 3;
3141     goto REPEATNOTCHAR;
3142    
3143 nigel 93 case OP_NOTPOSSTAR:
3144     possessive = TRUE;
3145     min = 0;
3146     max = INT_MAX;
3147     ecode++;
3148     goto REPEATNOTCHAR;
3149    
3150     case OP_NOTPOSPLUS:
3151     possessive = TRUE;
3152     min = 1;
3153     max = INT_MAX;
3154     ecode++;
3155     goto REPEATNOTCHAR;
3156    
3157     case OP_NOTPOSQUERY:
3158     possessive = TRUE;
3159     min = 0;
3160     max = 1;
3161     ecode++;
3162     goto REPEATNOTCHAR;
3163    
3164     case OP_NOTPOSUPTO:
3165     possessive = TRUE;
3166     min = 0;
3167     max = GET2(ecode, 1);
3168     ecode += 3;
3169     goto REPEATNOTCHAR;
3170    
3171 nigel 77 case OP_NOTSTAR:
3172     case OP_NOTMINSTAR:
3173     case OP_NOTPLUS:
3174     case OP_NOTMINPLUS:
3175     case OP_NOTQUERY:
3176     case OP_NOTMINQUERY:
3177     c = *ecode++ - OP_NOTSTAR;
3178     minimize = (c & 1) != 0;
3179     min = rep_min[c]; /* Pick up values from tables; */
3180     max = rep_max[c]; /* zero for max => infinity */
3181     if (max == 0) max = INT_MAX;
3182    
3183 ph10 426 /* Common code for all repeated single-byte matches. */
3184 nigel 77
3185     REPEATNOTCHAR:
3186     fc = *ecode++;
3187    
3188     /* The code is duplicated for the caseless and caseful cases, for speed,
3189     since matching characters is likely to be quite common. First, ensure the
3190     minimum number of matches are present. If min = max, continue at the same
3191     level without recursing. Otherwise, if minimizing, keep trying the rest of
3192     the expression and advancing one matching character if failing, up to the
3193     maximum. Alternatively, if maximizing, find the maximum number of
3194     characters and work backwards. */
3195    
3196     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3197     max, eptr));
3198    
3199     if ((ims & PCRE_CASELESS) != 0)
3200     {
3201     fc = md->lcc[fc];
3202    
3203     #ifdef SUPPORT_UTF8
3204     /* UTF-8 mode */
3205     if (utf8)
3206     {
3207 nigel 93 register unsigned int d;
3208 nigel 77 for (i = 1; i <= min; i++)
3209     {
3210 ph10 426 if (eptr >= md->end_subject)
3211     {
3212     SCHECK_PARTIAL();
3213 ph10 510 MRRETURN(MATCH_NOMATCH);
3214 ph10 427 }
3215 nigel 77 GETCHARINC(d, eptr);
3216     if (d < 256) d = md->lcc[d];
3217 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3218 nigel 77 }
3219     }
3220     else
3221     #endif
3222    
3223     /* Not UTF-8 mode */
3224     {
3225     for (i = 1; i <= min; i++)
3226 ph10 426 {
3227     if (eptr >= md->end_subject)
3228     {
3229     SCHECK_PARTIAL();
3230 ph10 510 MRRETURN(MATCH_NOMATCH);
3231 ph10 427 }
3232 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3233 ph10 427 }
3234 nigel 77 }
3235    
3236     if (min == max) continue;
3237    
3238     if (minimize)
3239     {
3240     #ifdef SUPPORT_UTF8
3241     /* UTF-8 mode */
3242     if (utf8)
3243     {
3244 nigel 93 register unsigned int d;
3245 nigel 77 for (fi = min;; fi++)
3246     {
3247 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3248 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3250 ph10 427 if (eptr >= md->end_subject)
3251 ph10 426 {
3252 ph10 427 SCHECK_PARTIAL();
3253 ph10 510 MRRETURN(MATCH_NOMATCH);
3254 ph10 427 }
3255 nigel 77 GETCHARINC(d, eptr);
3256     if (d < 256) d = md->lcc[d];
3257 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3258 nigel 77 }
3259     }
3260     else
3261     #endif
3262     /* Not UTF-8 mode */
3263     {
3264     for (fi = min;; fi++)
3265     {
3266 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3267 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3268 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3269 ph10 426 if (eptr >= md->end_subject)
3270     {
3271     SCHECK_PARTIAL();
3272 ph10 510 MRRETURN(MATCH_NOMATCH);
3273 ph10 426 }
3274 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3275 nigel 77 }
3276     }
3277     /* Control never gets here */
3278     }
3279    
3280     /* Maximize case */
3281    
3282     else
3283     {
3284     pp = eptr;
3285    
3286     #ifdef SUPPORT_UTF8
3287     /* UTF-8 mode */
3288     if (utf8)
3289     {
3290 nigel 93 register unsigned int d;
3291 nigel 77 for (i = min; i < max; i++)
3292     {
3293     int len = 1;
3294 ph10 463 if (eptr >= md->end_subject)
3295 ph10 462 {
3296 ph10 463 SCHECK_PARTIAL();
3297 ph10 462 break;
3298 ph10 463 }
3299 nigel 77 GETCHARLEN(d, eptr, len);
3300     if (d < 256) d = md->lcc[d];
3301     if (fc == d) break;
3302     eptr += len;
3303     }
3304 nigel 93 if (possessive) continue;
3305     for(;;)
3306 nigel 77 {
3307 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3308 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3309     if (eptr-- == pp) break; /* Stop if tried at original pos */
3310     BACKCHAR(eptr);
3311     }
3312     }
3313     else
3314     #endif
3315     /* Not UTF-8 mode */
3316     {
3317     for (i = min; i < max; i++)
3318     {
3319 ph10 463 if (eptr >= md->end_subject)
3320 ph10 462 {
3321     SCHECK_PARTIAL();
3322     break;
3323 ph10 463 }
3324 ph10 462 if (fc == md->lcc[*eptr]) break;
3325 nigel 77 eptr++;
3326     }
3327 nigel 93 if (possessive) continue;
3328 nigel 77 while (eptr >= pp)
3329     {
3330 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3331 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3332     eptr--;
3333     }
3334     }
3335    
3336 ph10 510 MRRETURN(MATCH_NOMATCH);
3337 nigel 77 }
3338     /* Control never gets here */
3339     }
3340    
3341     /* Caseful comparisons */
3342    
3343     else
3344     {
3345     #ifdef SUPPORT_UTF8
3346     /* UTF-8 mode */
3347     if (utf8)
3348     {
3349 nigel 93 register unsigned int d;
3350 nigel 77 for (i = 1; i <= min; i++)
3351     {
3352 ph10 426 if (eptr >= md->end_subject)
3353     {
3354     SCHECK_PARTIAL();
3355 ph10 510 MRRETURN(MATCH_NOMATCH);
3356 ph10 427 }
3357 nigel 77 GETCHARINC(d, eptr);
3358 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3359 nigel 77 }
3360     }
3361     else
3362     #endif
3363     /* Not UTF-8 mode */
3364     {
3365     for (i = 1; i <= min; i++)
3366 ph10 426 {
3367     if (eptr >= md->end_subject)
3368     {
3369     SCHECK_PARTIAL();
3370 ph10 510 MRRETURN(MATCH_NOMATCH);
3371 ph10 427 }
3372 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3373 ph10 427 }
3374 nigel 77 }
3375    
3376     if (min == max) continue;
3377    
3378     if (minimize)
3379     {
3380     #ifdef SUPPORT_UTF8
3381     /* UTF-8 mode */
3382     if (utf8)
3383     {
3384 nigel 93 register unsigned int d;
3385 nigel 77 for (fi = min;; fi++)
3386     {
3387 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3388 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390 ph10 427 if (eptr >= md->end_subject)
3391 ph10 426 {
3392 ph10 427 SCHECK_PARTIAL();
3393 ph10 510 MRRETURN(MATCH_NOMATCH);
3394 ph10 427 }
3395 nigel 77 GETCHARINC(d, eptr);
3396 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3397 nigel 77 }
3398     }
3399     else
3400     #endif
3401     /* Not UTF-8 mode */
3402     {
3403     for (fi = min;; fi++)
3404     {
3405 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3406 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3407 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3408 ph10 426 if (eptr >= md->end_subject)
3409     {
3410     SCHECK_PARTIAL();
3411 ph10 510 MRRETURN(MATCH_NOMATCH);
3412 ph10 427 }
3413 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3414 nigel 77 }
3415     }
3416     /* Control never gets here */
3417     }
3418    
3419     /* Maximize case */
3420    
3421     else
3422     {
3423     pp = eptr;
3424    
3425     #ifdef SUPPORT_UTF8
3426     /* UTF-8 mode */
3427     if (utf8)
3428     {
3429 nigel 93 register unsigned int d;
3430 nigel 77 for (i = min; i < max; i++)
3431     {
3432     int len = 1;
3433 ph10 463 if (eptr >= md->end_subject)
3434 ph10 462 {
3435 ph10 463 SCHECK_PARTIAL();
3436 ph10 462 break;
3437 ph10 463 }
3438 nigel 77 GETCHARLEN(d, eptr, len);
3439     if (fc == d) break;
3440     eptr += len;
3441     }
3442 nigel 93 if (possessive) continue;
3443 nigel 77 for(;;)
3444     {
3445 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3446 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447     if (eptr-- == pp) break; /* Stop if tried at original pos */
3448     BACKCHAR(eptr);
3449     }
3450     }
3451     else
3452     #endif
3453     /* Not UTF-8 mode */
3454     {
3455     for (i = min; i < max; i++)
3456     {
3457 ph10 463 if (eptr >= md->end_subject)
3458 ph10 462 {
3459 ph10 463 SCHECK_PARTIAL();
3460 ph10 462 break;
3461 ph10 463 }
3462 ph10 462 if (fc == *eptr) break;
3463 nigel 77 eptr++;
3464     }
3465 nigel 93 if (possessive) continue;
3466 nigel 77 while (eptr >= pp)
3467     {
3468 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3469 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3470     eptr--;
3471     }
3472     }
3473    
3474 ph10 510 MRRETURN(MATCH_NOMATCH);
3475 nigel 77 }
3476     }
3477     /* Control never gets here */
3478    
3479     /* Match a single character type repeatedly; several different opcodes
3480     share code. This is very similar to the code for single characters, but we
3481     repeat it in the interests of efficiency. */
3482    
3483     case OP_TYPEEXACT:
3484     min = max = GET2(ecode, 1);
3485     minimize = TRUE;
3486     ecode += 3;
3487     goto REPEATTYPE;
3488    
3489     case OP_TYPEUPTO:
3490     case OP_TYPEMINUPTO:
3491     min = 0;
3492     max = GET2(ecode, 1);
3493     minimize = *ecode == OP_TYPEMINUPTO;
3494     ecode += 3;
3495     goto REPEATTYPE;
3496    
3497 nigel 93 case OP_TYPEPOSSTAR:
3498     possessive = TRUE;
3499     min = 0;
3500     max = INT_MAX;
3501     ecode++;
3502     goto REPEATTYPE;
3503    
3504     case OP_TYPEPOSPLUS:
3505     possessive = TRUE;
3506     min = 1;
3507     max = INT_MAX;
3508     ecode++;
3509     goto REPEATTYPE;
3510    
3511     case OP_TYPEPOSQUERY:
3512     possessive = TRUE;
3513     min = 0;
3514     max = 1;
3515     ecode++;
3516     goto REPEATTYPE;
3517    
3518     case OP_TYPEPOSUPTO:
3519     possessive = TRUE;
3520     min = 0;
3521     max = GET2(ecode, 1);
3522     ecode += 3;
3523     goto REPEATTYPE;
3524    
3525 nigel 77 case OP_TYPESTAR:
3526     case OP_TYPEMINSTAR:
3527     case OP_TYPEPLUS:
3528     case OP_TYPEMINPLUS:
3529     case OP_TYPEQUERY:
3530     case OP_TYPEMINQUERY:
3531     c = *ecode++ - OP_TYPESTAR;
3532     minimize = (c & 1) != 0;
3533     min = rep_min[c]; /* Pick up values from tables; */
3534     max = rep_max[c]; /* zero for max => infinity */
3535     if (max == 0) max = INT_MAX;
3536    
3537     /* Common code for all repeated single character type matches. Note that
3538     in UTF-8 mode, '.' matches a character of any length, but for the other
3539     character types, the valid characters are all one-byte long. */
3540    
3541     REPEATTYPE:
3542     ctype = *ecode++; /* Code for the character type */
3543    
3544     #ifdef SUPPORT_UCP
3545     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3546     {
3547     prop_fail_result = ctype == OP_NOTPROP;
3548     prop_type = *ecode++;
3549 nigel 87 prop_value = *ecode++;
3550 nigel 77 }
3551     else prop_type = -1;
3552     #endif
3553    
3554     /* First, ensure the minimum number of matches are present. Use inline
3555     code for maximizing the speed, and do the type test once at the start
3556 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3557 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3558     and single-bytes. */
3559    
3560     if (min > 0)
3561     {
3562     #ifdef SUPPORT_UCP
3563 nigel 87 if (prop_type >= 0)
3564 nigel 77 {
3565 nigel 87 switch(prop_type)
3566 nigel 77 {
3567 nigel 87 case PT_ANY:
3568 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3569 nigel 87 for (i = 1; i <= min; i++)
3570     {
3571 ph10 427 if (eptr >= md->end_subject)
3572 ph10 426 {
3573 ph10 427 SCHECK_PARTIAL();
3574 ph10 510 MRRETURN(MATCH_NOMATCH);
3575 ph10 427 }
3576 ph10 184 GETCHARINCTEST(c, eptr);
3577 nigel 87 }
3578     break;
3579    
3580     case PT_LAMP:
3581     for (i = 1; i <= min; i++)
3582     {
3583 ph10 427 if (eptr >= md->end_subject)
3584 ph10 426 {
3585 ph10 427 SCHECK_PARTIAL();
3586 ph10 510 MRRETURN(MATCH_NOMATCH);
3587 ph10 427 }
3588 ph10 184 GETCHARINCTEST(c, eptr);
3589 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3590 nigel 87 if ((prop_chartype == ucp_Lu ||
3591     prop_chartype == ucp_Ll ||
3592     prop_chartype == ucp_Lt) == prop_fail_result)
3593 ph10 510 MRRETURN(MATCH_NOMATCH);
3594 nigel 87 }
3595     break;
3596    
3597     case PT_GC:
3598     for (i = 1; i <= min; i++)
3599     {
3600 ph10 427 if (eptr >= md->end_subject)
3601 ph10 426 {
3602 ph10 427 SCHECK_PARTIAL();
3603 ph10 510 MRRETURN(MATCH_NOMATCH);
3604 ph10 427 }
3605 ph10 184 GETCHARINCTEST(c, eptr);
3606 ph10 349 prop_category = UCD_CATEGORY(c);
3607 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3608 ph10 510 MRRETURN(MATCH_NOMATCH);
3609 nigel 87 }
3610     break;
3611    
3612     case PT_PC:
3613     for (i = 1; i <= min; i++)
3614     {
3615 ph10 427 if (eptr >= md->end_subject)
3616 ph10 426 {
3617 ph10 427 SCHECK_PARTIAL();
3618 ph10 510 MRRETURN(MATCH_NOMATCH);
3619 ph10 427 }
3620 ph10 184 GETCHARINCTEST(c, eptr);
3621 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3622 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3623 ph10 510 MRRETURN(MATCH_NOMATCH);
3624 nigel 87 }
3625     break;
3626    
3627     case PT_SC:
3628     for (i = 1; i <= min; i++)
3629     {
3630 ph10 427 if (eptr >= md->end_subject)
3631 ph10 426 {
3632 ph10 427 SCHECK_PARTIAL();
3633 ph10 510 MRRETURN(MATCH_NOMATCH);
3634 ph10 427 }
3635 ph10 184 GETCHARINCTEST(c, eptr);
3636 ph10 349 prop_script = UCD_SCRIPT(c);
3637 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3638 ph10 510 MRRETURN(MATCH_NOMATCH);
3639 nigel 87 }
3640     break;
3641 ph10 527
3642 ph10 517 case PT_ALNUM:
3643     for (i = 1; i <= min; i++)
3644     {
3645     if (eptr >= md->end_subject)
3646     {
3647     SCHECK_PARTIAL();
3648     MRRETURN(MATCH_NOMATCH);
3649     }
3650     GETCHARINCTEST(c, eptr);
3651 ph10 527 prop_category = UCD_CATEGORY(c);
3652     if ((prop_category == ucp_L || prop_category == ucp_N)
3653 ph10 517 == prop_fail_result)
3654     MRRETURN(MATCH_NOMATCH);
3655     }
3656     break;
3657 ph10 527
3658 ph10 517 case PT_SPACE: /* Perl space */
3659     for (i = 1; i <= min; i++)
3660     {
3661     if (eptr >= md->end_subject)
3662     {
3663     SCHECK_PARTIAL();
3664     MRRETURN(MATCH_NOMATCH);
3665     }
3666     GETCHARINCTEST(c, eptr);
3667 ph10 527 prop_category = UCD_CATEGORY(c);
3668     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3669     c == CHAR_FF || c == CHAR_CR)
3670 ph10 517 == prop_fail_result)
3671     MRRETURN(MATCH_NOMATCH);
3672     }
3673     break;
3674 ph10 527
3675 ph10 517 case PT_PXSPACE: /* POSIX space */
3676     for (i = 1; i <= min; i++)
3677     {
3678     if (eptr >= md->end_subject)
3679     {
3680     SCHECK_PARTIAL();
3681     MRRETURN(MATCH_NOMATCH);
3682     }
3683     GETCHARINCTEST(c, eptr);
3684 ph10 527 prop_category = UCD_CATEGORY(c);
3685     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3686     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3687 ph10 517 == prop_fail_result)
3688     MRRETURN(MATCH_NOMATCH);
3689     }
3690     break;
3691 ph10 527
3692     case PT_WORD:
3693 ph10 517 for (i = 1; i <= min; i++)
3694     {
3695     if (eptr >= md->end_subject)
3696     {
3697     SCHECK_PARTIAL();
3698     MRRETURN(MATCH_NOMATCH);
3699     }
3700     GETCHARINCTEST(c, eptr);
3701 ph10 527 prop_category = UCD_CATEGORY(c);
3702 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3703 ph10 527 c == CHAR_UNDERSCORE)
3704 ph10 517 == prop_fail_result)
3705     MRRETURN(MATCH_NOMATCH);
3706     }
3707     break;
3708 ph10 527
3709 ph10 517 /* This should not occur */
3710 nigel 87
3711     default:
3712     RRETURN(PCRE_ERROR_INTERNAL);
3713 nigel 77 }
3714     }
3715    
3716     /* Match extended Unicode sequences. We will get here only if the
3717     support is in the binary; otherwise a compile-time error occurs. */
3718    
3719     else if (ctype == OP_EXTUNI)
3720     {
3721     for (i = 1; i <= min; i++)
3722     {
3723 ph10 427 if (eptr >= md->end_subject)
3724 ph10 426 {
3725 ph10 427 SCHECK_PARTIAL();
3726 ph10 510 MRRETURN(MATCH_NOMATCH);
3727 ph10 427 }
3728 nigel 77 GETCHARINCTEST(c, eptr);
3729 ph10 349 prop_category = UCD_CATEGORY(c);
3730 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3731 nigel 77 while (eptr < md->end_subject)
3732     {
3733     int len = 1;
3734 ph10 426 if (!utf8) c = *eptr;
3735     else { GETCHARLEN(c, eptr, len); }
3736 ph10 349 prop_category = UCD_CATEGORY(c);
3737 nigel 77 if (prop_category != ucp_M) break;
3738     eptr += len;
3739     }
3740     }
3741     }
3742    
3743     else
3744     #endif /* SUPPORT_UCP */
3745    
3746     /* Handle all other cases when the coding is UTF-8 */
3747    
3748     #ifdef SUPPORT_UTF8
3749     if (utf8) switch(ctype)
3750     {
3751     case OP_ANY:
3752     for (i = 1; i <= min; i++)
3753     {
3754 ph10 426 if (eptr >= md->end_subject)
3755     {
3756 ph10 427 SCHECK_PARTIAL();
3757 ph10 510 MRRETURN(MATCH_NOMATCH);
3758 ph10 427 }
3759 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3760 nigel 91 eptr++;
3761 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3762     }
3763     break;
3764    
3765 ph10 341 case OP_ALLANY:
3766     for (i = 1; i <= min; i++)
3767     {
3768 ph10 427 if (eptr >= md->end_subject)
3769 ph10 426 {
3770     SCHECK_PARTIAL();
3771 ph10 510 MRRETURN(MATCH_NOMATCH);
3772 ph10 427 }
3773 ph10 341 eptr++;
3774     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3775     }
3776     break;
3777    
3778 nigel 77 case OP_ANYBYTE:
3779 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3780 nigel 77 eptr += min;
3781     break;
3782    
3783 nigel 93 case OP_ANYNL:
3784     for (i = 1; i <= min; i++)
3785     {
3786 ph10 427 if (eptr >= md->end_subject)
3787 ph10 426 {
3788     SCHECK_PARTIAL();
3789 ph10 510 MRRETURN(MATCH_NOMATCH);
3790 ph10 427 }
3791 nigel 93 GETCHARINC(c, eptr);
3792     switch(c)
3793     {
3794 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3795 ph10 600
3796 nigel 93 case 0x000d:
3797     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3798     break;
3799 ph10 231
3800 nigel 93 case 0x000a:
3801 ph10 231 break;
3802    
3803 nigel 93 case 0x000b:
3804     case 0x000c:
3805     case 0x0085:
3806     case 0x2028:
3807     case 0x2029:
3808 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3809 nigel 93 break;
3810     }
3811     }
3812     break;
3813    
3814 ph10 178 case OP_NOT_HSPACE:
3815     for (i = 1; i <= min; i++)
3816     {
3817 ph10 427 if (eptr >= md->end_subject)
3818 ph10 426 {
3819     SCHECK_PARTIAL();
3820 ph10 510 MRRETURN(MATCH_NOMATCH);
3821 ph10 427 }
3822 ph10 178 GETCHARINC(c, eptr);
3823     switch(c)
3824     {
3825     default: break;
3826     case 0x09: /* HT */
3827     case 0x20: /* SPACE */
3828     case 0xa0: /* NBSP */
3829     case 0x1680: /* OGHAM SPACE MARK */
3830     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3831     case 0x2000: /* EN QUAD */
3832     case 0x2001: /* EM QUAD */
3833     case 0x2002: /* EN SPACE */
3834     case 0x2003: /* EM SPACE */
3835     case 0x2004: /* THREE-PER-EM SPACE */
3836     case 0x2005: /* FOUR-PER-EM SPACE */
3837     case 0x2006: /* SIX-PER-EM SPACE */
3838     case 0x2007: /* FIGURE SPACE */
3839     case 0x2008: /* PUNCTUATION SPACE */
3840     case 0x2009: /* THIN SPACE */
3841     case 0x200A: /* HAIR SPACE */
3842     case 0x202f: /* NARROW NO-BREAK SPACE */
3843     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3844     case 0x3000: /* IDEOGRAPHIC SPACE */
3845 ph10 510 MRRETURN(MATCH_NOMATCH);
3846 ph10 178 }
3847     }
3848     break;
3849 ph10 182
3850 ph10 178 case OP_HSPACE:
3851     for (i = 1; i <= min; i++)
3852     {
3853 ph10 427 if (eptr >= md->end_subject)
3854 ph10 426 {
3855 ph10 427 SCHECK_PARTIAL();
3856 ph10 510 MRRETURN(MATCH_NOMATCH);
3857 ph10 427 }
3858 ph10 178 GETCHARINC(c, eptr);
3859     switch(c)
3860     {
3861 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3862 ph10 178 case 0x09: /* HT */
3863     case 0x20: /* SPACE */
3864     case 0xa0: /* NBSP */
3865     case 0x1680: /* OGHAM SPACE MARK */
3866     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3867     case 0x2000: /* EN QUAD */
3868     case 0x2001: /* EM QUAD */
3869     case 0x2002: /* EN SPACE */
3870     case 0x2003: /* EM SPACE */
3871     case 0x2004: /* THREE-PER-EM SPACE */
3872     case 0x2005: /* FOUR-PER-EM SPACE */
3873     case 0x2006: /* SIX-PER-EM SPACE */
3874     case 0x2007: /* FIGURE SPACE */
3875     case 0x2008: /* PUNCTUATION SPACE */
3876     case 0x2009: /* THIN SPACE */
3877     case 0x200A: /* HAIR SPACE */
3878     case 0x202f: /* NARROW NO-BREAK SPACE */
3879     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3880     case 0x3000: /* IDEOGRAPHIC SPACE */
3881     break;
3882     }
3883     }
3884     break;
3885 ph10 182
3886 ph10 178 case OP_NOT_VSPACE:
3887     for (i = 1; i <= min; i++)
3888     {
3889 ph10 427 if (eptr >= md->end_subject)
3890 ph10 426 {
3891 ph10 427 SCHECK_PARTIAL();
3892 ph10 510 MRRETURN(MATCH_NOMATCH);
3893 ph10 427 }
3894 ph10 178 GETCHARINC(c, eptr);
3895     switch(c)
3896     {
3897     default: break;
3898     case 0x0a: /* LF */
3899     case 0x0b: /* VT */
3900     case 0x0c: /* FF */
3901     case 0x0d: /* CR */
3902     case 0x85: /* NEL */
3903     case 0x2028: /* LINE SEPARATOR */
3904    </