/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 595 - (hide annotations) (download)
Mon May 2 10:33:29 2011 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 187622 byte(s)
Fix problems with caseless reference matching in UTF-8 mode when the 
upper/lower case characters have different lengths.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135 ph10 595 /* Normally, if a back reference hasn't been set, the length that is passed is
136     negative, so the match always fails. However, in JavaScript compatibility mode,
137     the length passed is zero. Note that in caseless UTF-8 mode, the number of
138     subject bytes matched may be different to the number of reference bytes.
139 nigel 77
140     Arguments:
141     offset index into the offset vector
142 ph10 595 eptr pointer into the subject
143     length length of reference to be matched (number of bytes)
144 nigel 77 md points to match data block
145     ims the ims flags
146    
147 ph10 595 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 nigel 77 */
149    
150 ph10 595 static int
151 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 nigel 77 unsigned long int ims)
153     {
154 ph10 595 USPTR eptr_start = eptr;
155     register USPTR p = md->start_subject + md->offset_vector[offset];
156 nigel 77
157 ph10 475 #ifdef PCRE_DEBUG
158 nigel 77 if (eptr >= md->end_subject)
159     printf("matching subject <null>");
160     else
161     {
162     printf("matching subject ");
163     pchars(eptr, length, TRUE, md);
164     }
165     printf(" against backref ");
166     pchars(p, length, FALSE, md);
167     printf("\n");
168     #endif
169    
170 ph10 595 /* Always fail if reference not set (and not JavaScript compatible). */
171 nigel 77
172 ph10 595 if (length < 0) return -1;
173 nigel 77
174 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175     properly if Unicode properties are supported. Otherwise, we can check only
176     ASCII characters. */
177 nigel 77
178     if ((ims & PCRE_CASELESS) != 0)
179     {
180 ph10 354 #ifdef SUPPORT_UTF8
181     #ifdef SUPPORT_UCP
182     if (md->utf8)
183     {
184 ph10 595 /* Match characters up to the end of the reference. NOTE: the number of
185     bytes matched may differ, because there are some characters whose upper and
186     lower case versions code as different numbers of bytes. For example, U+023A
187     (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188     a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189     the latter. It is important, therefore, to check the length along the
190     reference, not along the subject (earlier code did this wrong). */
191    
192     USPTR endptr = p + length;
193     while (p < endptr)
194 ph10 354 {
195 ph10 358 int c, d;
196 ph10 354 GETCHARINC(c, eptr);
197     GETCHARINC(d, p);
198 ph10 595 if (c != d && c != UCD_OTHERCASE(d)) return -1;
199 ph10 358 }
200     }
201 ph10 354 else
202     #endif
203     #endif
204    
205     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
206     is no UCP support. */
207 ph10 358
208 nigel 77 while (length-- > 0)
209 ph10 595 { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
210 nigel 77 }
211 ph10 358
212 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
213     are in UTF-8 mode. */
214 ph10 358
215 nigel 77 else
216 ph10 595 { while (length-- > 0) if (*p++ != *eptr++) return -1; }
217 nigel 77
218 ph10 595 return eptr - eptr_start;
219 nigel 77 }
220    
221    
222    
223     /***************************************************************************
224     ****************************************************************************
225     RECURSION IN THE match() FUNCTION
226    
227 nigel 87 The match() function is highly recursive, though not every recursive call
228     increases the recursive depth. Nevertheless, some regular expressions can cause
229     it to recurse to a great depth. I was writing for Unix, so I just let it call
230     itself recursively. This uses the stack for saving everything that has to be
231     saved for a recursive call. On Unix, the stack can be large, and this works
232     fine.
233 nigel 77
234 nigel 87 It turns out that on some non-Unix-like systems there are problems with
235     programs that use a lot of stack. (This despite the fact that every last chip
236     has oodles of memory these days, and techniques for extending the stack have
237     been known for decades.) So....
238 nigel 77
239     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
240     calls by keeping local variables that need to be preserved in blocks of memory
241 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
242 nigel 77 achieve this so that the actual code doesn't look very different to what it
243     always used to.
244 ph10 164
245 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
246 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
247     Switzer, the use of longjmp() has been abolished, at the cost of having to
248     provide a unique number for each call to RMATCH. There is no way of generating
249     a sequence of numbers at compile time in C. I have given them names, to make
250     them stand out more clearly.
251    
252     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
253     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
254 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
255     don't have indeterminate values; this has meant that the frame size can be
256 ph10 164 reduced because the result can be "passed back" by straight setting of the
257     variable instead of being passed in the frame.
258 nigel 77 ****************************************************************************
259     ***************************************************************************/
260    
261 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
262     below must be updated in sync. */
263 nigel 77
264 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
265     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
266     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
267     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
268 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
269 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
270     RM61, RM62 };
271 ph10 164
272 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
273 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
274 ph10 501 actually used in this definition. */
275 nigel 77
276     #ifndef NO_RECURSE
277     #define REGISTER register
278 ph10 164
279 ph10 475 #ifdef PCRE_DEBUG
280 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281 nigel 87 { \
282     printf("match() called in line %d\n", __LINE__); \
283 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
284 nigel 87 printf("to line %d\n", __LINE__); \
285     }
286     #define RRETURN(ra) \
287     { \
288     printf("match() returned %d from line %d ", ra, __LINE__); \
289     return ra; \
290     }
291     #else
292 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
293 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
294 nigel 77 #define RRETURN(ra) return ra
295 nigel 87 #endif
296    
297 nigel 77 #else
298    
299    
300 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
301     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
302     argument of match(), which never changes. */
303 nigel 77
304     #define REGISTER
305    
306 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
307 nigel 77 {\
308 ph10 563 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
309 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
310 ph10 164 frame->Xwhere = rw; \
311     newframe->Xeptr = ra;\
312     newframe->Xecode = rb;\
313 ph10 168 newframe->Xmstart = mstart;\
314 ph10 501 newframe->Xmarkptr = markptr;\
315 ph10 164 newframe->Xoffset_top = rc;\
316     newframe->Xims = re;\
317     newframe->Xeptrb = rf;\
318     newframe->Xflags = rg;\
319     newframe->Xrdepth = frame->Xrdepth + 1;\
320     newframe->Xprevframe = frame;\
321     frame = newframe;\
322     DPRINTF(("restarting from line %d\n", __LINE__));\
323     goto HEAP_RECURSE;\
324     L_##rw:\
325     DPRINTF(("jumped back to line %d\n", __LINE__));\
326 nigel 77 }
327    
328     #define RRETURN(ra)\
329     {\
330 ph10 527 heapframe *oldframe = frame;\
331     frame = oldframe->Xprevframe;\
332     (pcre_stack_free)(oldframe);\
333 nigel 77 if (frame != NULL)\
334     {\
335 ph10 164 rrc = ra;\
336     goto HEAP_RETURN;\
337 nigel 77 }\
338     return ra;\
339     }
340    
341    
342     /* Structure for remembering the local variables in a private frame */
343    
344     typedef struct heapframe {
345     struct heapframe *Xprevframe;
346    
347     /* Function arguments that may change */
348    
349 ph10 409 USPTR Xeptr;
350 nigel 77 const uschar *Xecode;
351 ph10 409 USPTR Xmstart;
352 ph10 501 USPTR Xmarkptr;
353 nigel 77 int Xoffset_top;
354     long int Xims;
355     eptrblock *Xeptrb;
356     int Xflags;
357 nigel 91 unsigned int Xrdepth;
358 nigel 77
359     /* Function local variables */
360    
361 ph10 409 USPTR Xcallpat;
362 ph10 406 #ifdef SUPPORT_UTF8
363 ph10 409 USPTR Xcharptr;
364 ph10 406 #endif
365 ph10 409 USPTR Xdata;
366     USPTR Xnext;
367     USPTR Xpp;
368     USPTR Xprev;
369     USPTR Xsaved_eptr;
370 nigel 77
371     recursion_info Xnew_recursive;
372    
373     BOOL Xcur_is_word;
374     BOOL Xcondition;
375     BOOL Xprev_is_word;
376    
377     unsigned long int Xoriginal_ims;
378    
379     #ifdef SUPPORT_UCP
380     int Xprop_type;
381 nigel 87 int Xprop_value;
382 nigel 77 int Xprop_fail_result;
383     int Xprop_category;
384     int Xprop_chartype;
385 nigel 87 int Xprop_script;
386 ph10 123 int Xoclength;
387     uschar Xocchars[8];
388 nigel 77 #endif
389    
390 ph10 403 int Xcodelink;
391 nigel 77 int Xctype;
392 nigel 93 unsigned int Xfc;
393 nigel 77 int Xfi;
394     int Xlength;
395     int Xmax;
396     int Xmin;
397     int Xnumber;
398     int Xoffset;
399     int Xop;
400     int Xsave_capture_last;
401     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
402     int Xstacksave[REC_STACK_SAVE_MAX];
403    
404     eptrblock Xnewptrb;
405    
406 ph10 164 /* Where to jump back to */
407 nigel 77
408 ph10 164 int Xwhere;
409 ph10 165
410 nigel 77 } heapframe;
411    
412     #endif
413    
414    
415     /***************************************************************************
416     ***************************************************************************/
417    
418    
419    
420     /*************************************************
421     * Match from current position *
422     *************************************************/
423    
424 nigel 93 /* This function is called recursively in many circumstances. Whenever it
425 nigel 77 returns a negative (error) response, the outer incarnation must also return the
426 ph10 426 same response. */
427 nigel 77
428 ph10 426 /* These macros pack up tests that are used for partial matching, and which
429     appears several times in the code. We set the "hit end" flag if the pointer is
430     at the end of the subject and also past the start of the subject (i.e.
431 ph10 427 something has been matched). For hard partial matching, we then return
432     immediately. The second one is used when we already know we are past the end of
433     the subject. */
434 ph10 426
435     #define CHECK_PARTIAL()\
436 ph10 553 if (md->partial != 0 && eptr >= md->end_subject && \
437     eptr > md->start_used_ptr) \
438     { \
439     md->hitend = TRUE; \
440     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
441 ph10 427 }
442 ph10 426
443     #define SCHECK_PARTIAL()\
444 ph10 553 if (md->partial != 0 && eptr > md->start_used_ptr) \
445     { \
446     md->hitend = TRUE; \
447     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
448 ph10 427 }
449 ph10 426
450 ph10 427
451 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
452     the md structure (e.g. utf8, end_subject) into individual variables to improve
453 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
454     made performance worse.
455    
456     Arguments:
457 nigel 93 eptr pointer to current character in subject
458     ecode pointer to current position in compiled code
459 ph10 168 mstart pointer to the current match start position (can be modified
460 ph10 172 by encountering \K)
461 ph10 501 markptr pointer to the most recent MARK name, or NULL
462 nigel 77 offset_top current top pointer
463     md pointer to "static" info for the match
464     ims current /i, /m, and /s options
465     eptrb pointer to chain of blocks containing eptr at start of
466     brackets - for testing for empty matches
467     flags can contain
468     match_condassert - this is an assertion condition
469 nigel 93 match_cbegroup - this is the start of an unlimited repeat
470     group that can match an empty string
471 nigel 87 rdepth the recursion depth
472 nigel 77
473     Returns: MATCH_MATCH if matched ) these values are >= 0
474     MATCH_NOMATCH if failed to match )
475 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
476 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
477 nigel 87 (e.g. stopped by repeated call or recursion limit)
478 nigel 77 */
479    
480     static int
481 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
482     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
483 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
484 nigel 77 {
485     /* These variables do not need to be preserved over recursion in this function,
486 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
487     "register" because they are used a lot in loops. */
488 nigel 77
489 nigel 91 register int rrc; /* Returns from recursive calls */
490     register int i; /* Used for loops not involving calls to RMATCH() */
491 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
492 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
493 nigel 77
494 nigel 93 BOOL minimize, possessive; /* Quantifier options */
495 ph10 403 int condcode;
496 nigel 93
497 nigel 77 /* When recursion is not being used, all "local" variables that have to be
498     preserved over calls to RMATCH() are part of a "frame" which is obtained from
499     heap storage. Set up the top-level frame here; others are obtained from the
500     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
501    
502     #ifdef NO_RECURSE
503 ph10 563 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
504 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
505 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
506    
507     /* Copy in the original argument variables */
508    
509     frame->Xeptr = eptr;
510     frame->Xecode = ecode;
511 ph10 168 frame->Xmstart = mstart;
512 ph10 501 frame->Xmarkptr = markptr;
513 nigel 77 frame->Xoffset_top = offset_top;
514     frame->Xims = ims;
515     frame->Xeptrb = eptrb;
516     frame->Xflags = flags;
517 nigel 87 frame->Xrdepth = rdepth;
518 nigel 77
519     /* This is where control jumps back to to effect "recursion" */
520    
521     HEAP_RECURSE:
522    
523     /* Macros make the argument variables come from the current frame */
524    
525     #define eptr frame->Xeptr
526     #define ecode frame->Xecode
527 ph10 168 #define mstart frame->Xmstart
528 ph10 501 #define markptr frame->Xmarkptr
529 nigel 77 #define offset_top frame->Xoffset_top
530     #define ims frame->Xims
531     #define eptrb frame->Xeptrb
532     #define flags frame->Xflags
533 nigel 87 #define rdepth frame->Xrdepth
534 nigel 77
535     /* Ditto for the local variables */
536    
537     #ifdef SUPPORT_UTF8
538     #define charptr frame->Xcharptr
539     #endif
540     #define callpat frame->Xcallpat
541 ph10 403 #define codelink frame->Xcodelink
542 nigel 77 #define data frame->Xdata
543     #define next frame->Xnext
544     #define pp frame->Xpp
545     #define prev frame->Xprev
546     #define saved_eptr frame->Xsaved_eptr
547    
548     #define new_recursive frame->Xnew_recursive
549    
550     #define cur_is_word frame->Xcur_is_word
551     #define condition frame->Xcondition
552     #define prev_is_word frame->Xprev_is_word
553    
554     #define original_ims frame->Xoriginal_ims
555    
556     #ifdef SUPPORT_UCP
557     #define prop_type frame->Xprop_type
558 nigel 87 #define prop_value frame->Xprop_value
559 nigel 77 #define prop_fail_result frame->Xprop_fail_result
560     #define prop_category frame->Xprop_category
561     #define prop_chartype frame->Xprop_chartype
562 nigel 87 #define prop_script frame->Xprop_script
563 ph10 115 #define oclength frame->Xoclength
564     #define occhars frame->Xocchars
565 nigel 77 #endif
566    
567     #define ctype frame->Xctype
568     #define fc frame->Xfc
569     #define fi frame->Xfi
570     #define length frame->Xlength
571     #define max frame->Xmax
572     #define min frame->Xmin
573     #define number frame->Xnumber
574     #define offset frame->Xoffset
575     #define op frame->Xop
576     #define save_capture_last frame->Xsave_capture_last
577     #define save_offset1 frame->Xsave_offset1
578     #define save_offset2 frame->Xsave_offset2
579     #define save_offset3 frame->Xsave_offset3
580     #define stacksave frame->Xstacksave
581    
582     #define newptrb frame->Xnewptrb
583    
584     /* When recursion is being used, local variables are allocated on the stack and
585     get preserved during recursion in the normal way. In this environment, fi and
586     i, and fc and c, can be the same variables. */
587    
588 nigel 93 #else /* NO_RECURSE not defined */
589 nigel 77 #define fi i
590     #define fc c
591    
592    
593 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
594     const uschar *charptr; /* in small blocks of the code. My normal */
595     #endif /* style of coding would have declared */
596     const uschar *callpat; /* them within each of those blocks. */
597     const uschar *data; /* However, in order to accommodate the */
598     const uschar *next; /* version of this code that uses an */
599     USPTR pp; /* external "stack" implemented on the */
600     const uschar *prev; /* heap, it is easier to declare them all */
601     USPTR saved_eptr; /* here, so the declarations can be cut */
602     /* out in a block. The only declarations */
603     recursion_info new_recursive; /* within blocks below are for variables */
604     /* that do not have to be preserved over */
605     BOOL cur_is_word; /* a recursive call to RMATCH(). */
606     BOOL condition;
607 nigel 77 BOOL prev_is_word;
608    
609     unsigned long int original_ims;
610    
611     #ifdef SUPPORT_UCP
612     int prop_type;
613 nigel 87 int prop_value;
614 nigel 77 int prop_fail_result;
615     int prop_category;
616     int prop_chartype;
617 nigel 87 int prop_script;
618 ph10 115 int oclength;
619     uschar occhars[8];
620 nigel 77 #endif
621    
622 ph10 399 int codelink;
623 nigel 77 int ctype;
624     int length;
625     int max;
626     int min;
627     int number;
628     int offset;
629     int op;
630     int save_capture_last;
631     int save_offset1, save_offset2, save_offset3;
632     int stacksave[REC_STACK_SAVE_MAX];
633    
634     eptrblock newptrb;
635 nigel 93 #endif /* NO_RECURSE */
636 nigel 77
637     /* These statements are here to stop the compiler complaining about unitialized
638     variables. */
639    
640     #ifdef SUPPORT_UCP
641 nigel 87 prop_value = 0;
642 nigel 77 prop_fail_result = 0;
643     #endif
644    
645 nigel 93
646 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
647     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
648     used. Thanks to Ian Taylor for noticing this possibility and sending the
649     original patch. */
650    
651     TAIL_RECURSE:
652    
653 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
654     are specified by the macro RMATCH and RRETURN is used to return. When
655     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
656 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
657 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
658     complicated macro. It has to be used in one particular way. This shouldn't,
659     however, impact performance when true recursion is being used. */
660 nigel 77
661 ph10 164 #ifdef SUPPORT_UTF8
662     utf8 = md->utf8; /* Local copy of the flag */
663     #else
664     utf8 = FALSE;
665     #endif
666    
667 nigel 87 /* First check that we haven't called match() too many times, or that we
668     haven't exceeded the recursive call limit. */
669    
670 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
671 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
672 nigel 77
673     original_ims = ims; /* Save for resetting on ')' */
674 nigel 91
675 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
676     string, the match_cbegroup flag is set. When this is the case, add the current
677     subject pointer to the chain of such remembered pointers, to be checked when we
678     hit the closing ket, in order to break infinite loops that match no characters.
679 ph10 197 When match() is called in other circumstances, don't add to the chain. The
680     match_cbegroup flag must NOT be used with tail recursion, because the memory
681     block that is used is on the stack, so a new one may be required for each
682     match(). */
683 nigel 77
684 nigel 93 if ((flags & match_cbegroup) != 0)
685 nigel 77 {
686 ph10 197 newptrb.epb_saved_eptr = eptr;
687     newptrb.epb_prev = eptrb;
688     eptrb = &newptrb;
689 nigel 77 }
690    
691 nigel 93 /* Now start processing the opcodes. */
692 nigel 77
693     for (;;)
694     {
695 nigel 93 minimize = possessive = FALSE;
696 nigel 77 op = *ecode;
697 ph10 443
698 nigel 93 switch(op)
699     {
700 ph10 510 case OP_MARK:
701     markptr = ecode + 2;
702     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
703 ph10 512 ims, eptrb, flags, RM55);
704    
705     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
706     argument, and we must check whether that argument matches this MARK's
707     argument. It is passed back in md->start_match_ptr (an overloading of that
708     variable). If it does match, we reset that variable to the current subject
709     position and return MATCH_SKIP. Otherwise, pass back the return code
710 ph10 510 unaltered. */
711 ph10 512
712     if (rrc == MATCH_SKIP_ARG &&
713 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
714     {
715     md->start_match_ptr = eptr;
716     RRETURN(MATCH_SKIP);
717     }
718    
719 ph10 512 if (md->mark == NULL) md->mark = markptr;
720 ph10 510 RRETURN(rrc);
721    
722 ph10 210 case OP_FAIL:
723 ph10 510 MRRETURN(MATCH_NOMATCH);
724 ph10 211
725 ph10 551 /* COMMIT overrides PRUNE, SKIP, and THEN */
726 ph10 553
727 ph10 510 case OP_COMMIT:
728     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729     ims, eptrb, flags, RM52);
730 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
731 ph10 553 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
732     rrc != MATCH_THEN)
733 ph10 551 RRETURN(rrc);
734 ph10 510 MRRETURN(MATCH_COMMIT);
735    
736 ph10 551 /* PRUNE overrides THEN */
737 ph10 553
738 ph10 210 case OP_PRUNE:
739     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
740     ims, eptrb, flags, RM51);
741 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742 ph10 510 MRRETURN(MATCH_PRUNE);
743 ph10 211
744 ph10 510 case OP_PRUNE_ARG:
745     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
746 ph10 512 ims, eptrb, flags, RM56);
747 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 ph10 510 md->mark = ecode + 2;
749     RRETURN(MATCH_PRUNE);
750 ph10 211
751 ph10 551 /* SKIP overrides PRUNE and THEN */
752 ph10 553
753 ph10 210 case OP_SKIP:
754     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
755     ims, eptrb, flags, RM53);
756 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
757 ph10 551 RRETURN(rrc);
758 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
759 ph10 510 MRRETURN(MATCH_SKIP);
760 ph10 211
761 ph10 510 case OP_SKIP_ARG:
762     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
763 ph10 512 ims, eptrb, flags, RM57);
764 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
765 ph10 551 RRETURN(rrc);
766 ph10 512
767     /* Pass back the current skip name by overloading md->start_match_ptr and
768     returning the special MATCH_SKIP_ARG return code. This will either be
769     caught by a matching MARK, or get to the top, where it is treated the same
770 ph10 510 as PRUNE. */
771 ph10 512
772 ph10 510 md->start_match_ptr = ecode + 2;
773 ph10 512 RRETURN(MATCH_SKIP_ARG);
774 ph10 553
775 ph10 550 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
776 ph10 553 the alt that is at the start of the current branch. This makes it possible
777     to skip back past alternatives that precede the THEN within the current
778     branch. */
779 ph10 512
780 ph10 210 case OP_THEN:
781     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
782 ph10 212 ims, eptrb, flags, RM54);
783 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
784 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
785 ph10 510 MRRETURN(MATCH_THEN);
786    
787     case OP_THEN_ARG:
788 ph10 553 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
789 ph10 550 offset_top, md, ims, eptrb, flags, RM58);
790 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
791 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
792     md->mark = ecode + LINK_SIZE + 2;
793 ph10 212 RRETURN(MATCH_THEN);
794 ph10 211
795 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
796     the current subject position in the working slot at the top of the vector.
797     We mustn't change the current values of the data slot, because they may be
798     set from a previous iteration of this group, and be referred to by a
799     reference inside the group.
800 nigel 77
801 nigel 93 If the bracket fails to match, we need to restore this value and also the
802     values of the final offsets, in case they were set by a previous iteration
803     of the same bracket.
804 nigel 77
805 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
806     a non-capturing bracket. Don't worry about setting the flag for the error
807     case here; that is handled in the code for KET. */
808 nigel 77
809 nigel 93 case OP_CBRA:
810     case OP_SCBRA:
811     number = GET2(ecode, 1+LINK_SIZE);
812 nigel 77 offset = number << 1;
813    
814 ph10 475 #ifdef PCRE_DEBUG
815 nigel 93 printf("start bracket %d\n", number);
816     printf("subject=");
817 nigel 77 pchars(eptr, 16, TRUE, md);
818     printf("\n");
819     #endif
820    
821     if (offset < md->offset_max)
822     {
823     save_offset1 = md->offset_vector[offset];
824     save_offset2 = md->offset_vector[offset+1];
825     save_offset3 = md->offset_vector[md->offset_end - number];
826     save_capture_last = md->capture_last;
827    
828     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
829 ph10 531 md->offset_vector[md->offset_end - number] =
830 ph10 530 (int)(eptr - md->start_subject);
831 nigel 77
832 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
833 nigel 77 do
834     {
835 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
836     ims, eptrb, flags, RM1);
837 ph10 550 if (rrc != MATCH_NOMATCH &&
838     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
839     RRETURN(rrc);
840 nigel 77 md->capture_last = save_capture_last;
841     ecode += GET(ecode, 1);
842     }
843     while (*ecode == OP_ALT);
844    
845     DPRINTF(("bracket %d failed\n", number));
846    
847     md->offset_vector[offset] = save_offset1;
848     md->offset_vector[offset+1] = save_offset2;
849     md->offset_vector[md->offset_end - number] = save_offset3;
850    
851 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
852 nigel 77 RRETURN(MATCH_NOMATCH);
853     }
854    
855 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
856     as a non-capturing bracket. */
857 nigel 77
858 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
860    
861 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
862 nigel 77
863 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
864     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865    
866 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
867     final alternative within the brackets, we would return the result of a
868     recursive call to match() whatever happened. We can reduce stack usage by
869 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
870     is set.*/
871 nigel 77
872 nigel 93 case OP_BRA:
873     case OP_SBRA:
874     DPRINTF(("start non-capturing bracket\n"));
875     flags = (op >= OP_SBRA)? match_cbegroup : 0;
876 nigel 91 for (;;)
877 nigel 77 {
878 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
879 nigel 93 {
880 ph10 197 if (flags == 0) /* Not a possibly empty group */
881     {
882     ecode += _pcre_OP_lengths[*ecode];
883     DPRINTF(("bracket 0 tail recursion\n"));
884     goto TAIL_RECURSE;
885     }
886    
887     /* Possibly empty group; can't use tail recursion. */
888    
889     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
890     eptrb, flags, RM48);
891 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
892     RRETURN(rrc);
893 nigel 93 }
894 nigel 91
895     /* For non-final alternatives, continue the loop for a NOMATCH result;
896     otherwise return. */
897    
898 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
899     eptrb, flags, RM2);
900 ph10 550 if (rrc != MATCH_NOMATCH &&
901     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
902     RRETURN(rrc);
903 nigel 77 ecode += GET(ecode, 1);
904     }
905 nigel 91 /* Control never reaches here. */
906 nigel 77
907     /* Conditional group: compilation checked that there are no more than
908     two branches. If the condition is false, skipping the first branch takes us
909     past the end if there is only one branch, but that's OK because that is
910 nigel 91 exactly what going to the ket would do. As there is only one branch to be
911     obeyed, we can use tail recursion to avoid using another stack frame. */
912 nigel 77
913     case OP_COND:
914 nigel 93 case OP_SCOND:
915 ph10 399 codelink= GET(ecode, 1);
916 ph10 406
917 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
918     inserted between OP_COND and an assertion condition. */
919 ph10 392
920 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
921     {
922     if (pcre_callout != NULL)
923     {
924     pcre_callout_block cb;
925     cb.version = 1; /* Version 1 of the callout block */
926     cb.callout_number = ecode[LINK_SIZE+2];
927     cb.offset_vector = md->offset_vector;
928     cb.subject = (PCRE_SPTR)md->start_subject;
929 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
930     cb.start_match = (int)(mstart - md->start_subject);
931     cb.current_position = (int)(eptr - md->start_subject);
932 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
933     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
934     cb.capture_top = offset_top/2;
935     cb.capture_last = md->capture_last;
936     cb.callout_data = md->callout_data;
937 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
938 ph10 381 if (rrc < 0) RRETURN(rrc);
939     }
940     ecode += _pcre_OP_lengths[OP_CALLOUT];
941     }
942 ph10 392
943 ph10 399 condcode = ecode[LINK_SIZE+1];
944 ph10 406
945 ph10 381 /* Now see what the actual condition is */
946 ph10 392
947 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
948 nigel 77 {
949 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
950     {
951 ph10 461 condition = FALSE;
952     ecode += GET(ecode, 1);
953     }
954 ph10 459 else
955 ph10 461 {
956 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
957     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
958 ph10 461
959 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
960     false, but the test was set up by name, scan the table to see if the
961     name refers to any other numbers, and test them. The condition is true
962     if any one is set. */
963 ph10 461
964 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
965     {
966     uschar *slotA = md->name_table;
967     for (i = 0; i < md->name_count; i++)
968 ph10 461 {
969     if (GET2(slotA, 0) == recno) break;
970 ph10 459 slotA += md->name_entry_size;
971     }
972 ph10 461
973 ph10 459 /* Found a name for the number - there can be only one; duplicate
974     names for different numbers are allowed, but not vice versa. First
975     scan down for duplicates. */
976 ph10 461
977 ph10 459 if (i < md->name_count)
978 ph10 461 {
979 ph10 459 uschar *slotB = slotA;
980     while (slotB > md->name_table)
981     {
982     slotB -= md->name_entry_size;
983     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
984     {
985     condition = GET2(slotB, 0) == md->recursive->group_num;
986 ph10 461 if (condition) break;
987     }
988 ph10 459 else break;
989 ph10 461 }
990    
991 ph10 459 /* Scan up for duplicates */
992 ph10 461
993 ph10 459 if (!condition)
994 ph10 461 {
995 ph10 459 slotB = slotA;
996     for (i++; i < md->name_count; i++)
997     {
998     slotB += md->name_entry_size;
999     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1000     {
1001     condition = GET2(slotB, 0) == md->recursive->group_num;
1002     if (condition) break;
1003 ph10 461 }
1004 ph10 459 else break;
1005 ph10 461 }
1006     }
1007 ph10 459 }
1008 ph10 461 }
1009    
1010 ph10 459 /* Chose branch according to the condition */
1011 ph10 461
1012 ph10 459 ecode += condition? 3 : GET(ecode, 1);
1013     }
1014 ph10 461 }
1015 nigel 93
1016 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1017 nigel 93 {
1018 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1019 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1020 ph10 461
1021 ph10 459 /* If the numbered capture is unset, but the reference was by name,
1022 ph10 461 scan the table to see if the name refers to any other numbers, and test
1023     them. The condition is true if any one is set. This is tediously similar
1024     to the code above, but not close enough to try to amalgamate. */
1025    
1026 ph10 459 if (!condition && condcode == OP_NCREF)
1027     {
1028 ph10 461 int refno = offset >> 1;
1029 ph10 459 uschar *slotA = md->name_table;
1030 ph10 461
1031 ph10 459 for (i = 0; i < md->name_count; i++)
1032 ph10 461 {
1033     if (GET2(slotA, 0) == refno) break;
1034 ph10 459 slotA += md->name_entry_size;
1035     }
1036 ph10 461
1037     /* Found a name for the number - there can be only one; duplicate names
1038     for different numbers are allowed, but not vice versa. First scan down
1039 ph10 459 for duplicates. */
1040 ph10 461
1041 ph10 459 if (i < md->name_count)
1042 ph10 461 {
1043 ph10 459 uschar *slotB = slotA;
1044     while (slotB > md->name_table)
1045     {
1046     slotB -= md->name_entry_size;
1047     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1048     {
1049     offset = GET2(slotB, 0) << 1;
1050 ph10 461 condition = offset < offset_top &&
1051 ph10 459 md->offset_vector[offset] >= 0;
1052 ph10 461 if (condition) break;
1053     }
1054 ph10 459 else break;
1055 ph10 461 }
1056    
1057 ph10 459 /* Scan up for duplicates */
1058 ph10 461
1059 ph10 459 if (!condition)
1060 ph10 461 {
1061 ph10 459 slotB = slotA;
1062     for (i++; i < md->name_count; i++)
1063     {
1064     slotB += md->name_entry_size;
1065     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1066     {
1067     offset = GET2(slotB, 0) << 1;
1068 ph10 461 condition = offset < offset_top &&
1069 ph10 459 md->offset_vector[offset] >= 0;
1070 ph10 461 if (condition) break;
1071     }
1072 ph10 459 else break;
1073 ph10 461 }
1074     }
1075 ph10 459 }
1076 ph10 461 }
1077    
1078 ph10 459 /* Chose branch according to the condition */
1079    
1080 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1081 nigel 77 }
1082    
1083 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1084 nigel 93 {
1085     condition = FALSE;
1086     ecode += GET(ecode, 1);
1087     }
1088    
1089 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1090 nigel 93 the final argument match_condassert causes it to stop at the end of an
1091     assertion. */
1092 nigel 77
1093     else
1094     {
1095 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1096     match_condassert, RM3);
1097 nigel 77 if (rrc == MATCH_MATCH)
1098     {
1099 nigel 93 condition = TRUE;
1100     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1101 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1102     }
1103 ph10 550 else if (rrc != MATCH_NOMATCH &&
1104     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1105 nigel 77 {
1106     RRETURN(rrc); /* Need braces because of following else */
1107     }
1108 nigel 93 else
1109     {
1110     condition = FALSE;
1111 ph10 399 ecode += codelink;
1112 nigel 93 }
1113     }
1114 nigel 91
1115 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1116 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1117     match_cbegroup is required for an unlimited repeat of a possibly empty
1118     group. If the second alternative doesn't exist, we can just plough on. */
1119 nigel 91
1120 nigel 93 if (condition || *ecode == OP_ALT)
1121     {
1122 nigel 91 ecode += 1 + LINK_SIZE;
1123 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1124     {
1125     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1126     RRETURN(rrc);
1127     }
1128     else /* Group must match something */
1129     {
1130     flags = 0;
1131     goto TAIL_RECURSE;
1132     }
1133 nigel 77 }
1134 ph10 395 else /* Condition false & no alternative */
1135 nigel 93 {
1136     ecode += 1 + LINK_SIZE;
1137     }
1138     break;
1139 nigel 77
1140 ph10 461
1141 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1142     to close any currently open capturing brackets. */
1143 ph10 461
1144 ph10 447 case OP_CLOSE:
1145 ph10 461 number = GET2(ecode, 1);
1146 ph10 447 offset = number << 1;
1147 ph10 461
1148 ph10 475 #ifdef PCRE_DEBUG
1149 ph10 447 printf("end bracket %d at *ACCEPT", number);
1150     printf("\n");
1151     #endif
1152 nigel 77
1153 ph10 447 md->capture_last = number;
1154     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1155     {
1156     md->offset_vector[offset] =
1157     md->offset_vector[md->offset_end - number];
1158 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1159 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1160     }
1161     ecode += 3;
1162 ph10 461 break;
1163 ph10 447
1164    
1165 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1166     recursion, we should restore the offsets appropriately and continue from
1167     after the call. */
1168 nigel 77
1169 ph10 210 case OP_ACCEPT:
1170 nigel 77 case OP_END:
1171     if (md->recursive != NULL && md->recursive->group_num == 0)
1172     {
1173     recursion_info *rec = md->recursive;
1174 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1175 nigel 77 md->recursive = rec->prevrec;
1176     memmove(md->offset_vector, rec->offset_save,
1177     rec->saved_max * sizeof(int));
1178 ph10 461 offset_top = rec->save_offset_top;
1179 nigel 77 ims = original_ims;
1180     ecode = rec->after_call;
1181     break;
1182     }
1183    
1184 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1185     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1186     the subject. In both cases, backtracking will then try other alternatives,
1187     if any. */
1188 ph10 443
1189 ph10 442 if (eptr == mstart &&
1190     (md->notempty ||
1191 ph10 443 (md->notempty_atstart &&
1192 ph10 442 mstart == md->start_subject + md->start_offset)))
1193 ph10 510 MRRETURN(MATCH_NOMATCH);
1194 ph10 443
1195 ph10 442 /* Otherwise, we have a match. */
1196 nigel 77
1197 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1198     md->end_offset_top = offset_top; /* and how many extracts were taken */
1199 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1200 nigel 77
1201 ph10 512 /* For some reason, the macros don't work properly if an expression is
1202     given as the argument to MRRETURN when the heap is in use. */
1203    
1204     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1205     MRRETURN(rrc);
1206    
1207 nigel 77 /* Change option settings */
1208    
1209     case OP_OPT:
1210     ims = ecode[1];
1211     ecode += 2;
1212     DPRINTF(("ims set to %02lx\n", ims));
1213     break;
1214    
1215     /* Assertion brackets. Check the alternative branches in turn - the
1216     matching won't pass the KET for an assertion. If any one branch matches,
1217     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1218     start of each branch to move the current point backwards, so the code at
1219     this level is identical to the lookahead case. */
1220    
1221     case OP_ASSERT:
1222     case OP_ASSERTBACK:
1223     do
1224     {
1225 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1226     RM4);
1227 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1228 ph10 500 {
1229     mstart = md->start_match_ptr; /* In case \K reset it */
1230     break;
1231 ph10 501 }
1232 ph10 550 if (rrc != MATCH_NOMATCH &&
1233     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1234     RRETURN(rrc);
1235 nigel 77 ecode += GET(ecode, 1);
1236     }
1237     while (*ecode == OP_ALT);
1238 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1239 nigel 77
1240     /* If checking an assertion for a condition, return MATCH_MATCH. */
1241    
1242     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1243    
1244     /* Continue from after the assertion, updating the offsets high water
1245     mark, since extracts may have been taken during the assertion. */
1246    
1247     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1248     ecode += 1 + LINK_SIZE;
1249     offset_top = md->end_offset_top;
1250     continue;
1251    
1252 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1253 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1254 ph10 473 branches. */
1255 nigel 77
1256     case OP_ASSERT_NOT:
1257     case OP_ASSERTBACK_NOT:
1258     do
1259     {
1260 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1261     RM5);
1262 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1263 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1264     {
1265     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1266 ph10 482 break;
1267     }
1268 ph10 550 if (rrc != MATCH_NOMATCH &&
1269     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1270     RRETURN(rrc);
1271 nigel 77 ecode += GET(ecode,1);
1272     }
1273     while (*ecode == OP_ALT);
1274    
1275     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1276    
1277     ecode += 1 + LINK_SIZE;
1278     continue;
1279    
1280     /* Move the subject pointer back. This occurs only at the start of
1281     each branch of a lookbehind assertion. If we are too close to the start to
1282     move back, this match function fails. When working with UTF-8 we move
1283     back a number of characters, not bytes. */
1284    
1285     case OP_REVERSE:
1286     #ifdef SUPPORT_UTF8
1287     if (utf8)
1288     {
1289 nigel 93 i = GET(ecode, 1);
1290     while (i-- > 0)
1291 nigel 77 {
1292     eptr--;
1293 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294 ph10 207 BACKCHAR(eptr);
1295 nigel 77 }
1296     }
1297     else
1298     #endif
1299    
1300     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1301    
1302     {
1303 nigel 93 eptr -= GET(ecode, 1);
1304 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1305 nigel 77 }
1306    
1307 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1308 nigel 77
1309 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1310 nigel 77 ecode += 1 + LINK_SIZE;
1311     break;
1312    
1313     /* The callout item calls an external function, if one is provided, passing
1314     details of the match so far. This is mainly for debugging, though the
1315     function is able to force a failure. */
1316    
1317     case OP_CALLOUT:
1318     if (pcre_callout != NULL)
1319     {
1320     pcre_callout_block cb;
1321     cb.version = 1; /* Version 1 of the callout block */
1322     cb.callout_number = ecode[1];
1323     cb.offset_vector = md->offset_vector;
1324 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1325 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1326     cb.start_match = (int)(mstart - md->start_subject);
1327     cb.current_position = (int)(eptr - md->start_subject);
1328 nigel 77 cb.pattern_position = GET(ecode, 2);
1329     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1330     cb.capture_top = offset_top/2;
1331     cb.capture_last = md->capture_last;
1332     cb.callout_data = md->callout_data;
1333 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1334 nigel 77 if (rrc < 0) RRETURN(rrc);
1335     }
1336     ecode += 2 + 2*LINK_SIZE;
1337     break;
1338    
1339     /* Recursion either matches the current regex, or some subexpression. The
1340     offset data is the offset to the starting bracket from the start of the
1341     whole pattern. (This is so that it works from duplicated subpatterns.)
1342    
1343     If there are any capturing brackets started but not finished, we have to
1344     save their starting points and reinstate them after the recursion. However,
1345     we don't know how many such there are (offset_top records the completed
1346     total) so we just have to save all the potential data. There may be up to
1347     65535 such values, which is too large to put on the stack, but using malloc
1348     for small numbers seems expensive. As a compromise, the stack is used when
1349     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1350     is used. A problem is what to do if the malloc fails ... there is no way of
1351     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1352     values on the stack, and accept that the rest may be wrong.
1353    
1354     There are also other values that have to be saved. We use a chained
1355     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1356     for the original version of this logic. */
1357    
1358     case OP_RECURSE:
1359     {
1360     callpat = md->start_code + GET(ecode, 1);
1361 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1362     GET2(callpat, 1 + LINK_SIZE);
1363 nigel 77
1364     /* Add to "recursing stack" */
1365    
1366     new_recursive.prevrec = md->recursive;
1367     md->recursive = &new_recursive;
1368    
1369     /* Find where to continue from afterwards */
1370    
1371     ecode += 1 + LINK_SIZE;
1372     new_recursive.after_call = ecode;
1373    
1374     /* Now save the offset data. */
1375    
1376     new_recursive.saved_max = md->offset_end;
1377     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1378     new_recursive.offset_save = stacksave;
1379     else
1380     {
1381     new_recursive.offset_save =
1382     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1383     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1384     }
1385    
1386     memcpy(new_recursive.offset_save, md->offset_vector,
1387     new_recursive.saved_max * sizeof(int));
1388 ph10 461 new_recursive.save_offset_top = offset_top;
1389 nigel 77
1390     /* OK, now we can do the recursion. For each top-level alternative we
1391     restore the offset and recursion data. */
1392    
1393     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1394 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1395 nigel 77 do
1396     {
1397 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1398     md, ims, eptrb, flags, RM6);
1399 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1400 nigel 77 {
1401 nigel 87 DPRINTF(("Recursion matched\n"));
1402 nigel 77 md->recursive = new_recursive.prevrec;
1403     if (new_recursive.offset_save != stacksave)
1404     (pcre_free)(new_recursive.offset_save);
1405 ph10 510 MRRETURN(MATCH_MATCH);
1406 nigel 77 }
1407 ph10 550 else if (rrc != MATCH_NOMATCH &&
1408     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1409 nigel 87 {
1410     DPRINTF(("Recursion gave error %d\n", rrc));
1411 ph10 400 if (new_recursive.offset_save != stacksave)
1412     (pcre_free)(new_recursive.offset_save);
1413 nigel 87 RRETURN(rrc);
1414     }
1415 nigel 77
1416     md->recursive = &new_recursive;
1417     memcpy(md->offset_vector, new_recursive.offset_save,
1418     new_recursive.saved_max * sizeof(int));
1419     callpat += GET(callpat, 1);
1420     }
1421     while (*callpat == OP_ALT);
1422    
1423     DPRINTF(("Recursion didn't match\n"));
1424     md->recursive = new_recursive.prevrec;
1425     if (new_recursive.offset_save != stacksave)
1426     (pcre_free)(new_recursive.offset_save);
1427 ph10 510 MRRETURN(MATCH_NOMATCH);
1428 nigel 77 }
1429     /* Control never reaches here */
1430    
1431     /* "Once" brackets are like assertion brackets except that after a match,
1432     the point in the subject string is not moved back. Thus there can never be
1433     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1434     Check the alternative branches in turn - the matching won't pass the KET
1435     for this kind of subpattern. If any one branch matches, we carry on as at
1436 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1437     the start-of-match value in case it was changed by \K. */
1438 nigel 77
1439     case OP_ONCE:
1440 nigel 91 prev = ecode;
1441     saved_eptr = eptr;
1442    
1443     do
1444 nigel 77 {
1445 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1446 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1447 ph10 500 {
1448     mstart = md->start_match_ptr;
1449     break;
1450 ph10 501 }
1451 ph10 550 if (rrc != MATCH_NOMATCH &&
1452     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1453     RRETURN(rrc);
1454 nigel 91 ecode += GET(ecode,1);
1455     }
1456     while (*ecode == OP_ALT);
1457 nigel 77
1458 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1459 nigel 77
1460 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1461 nigel 77
1462 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1463     mark, since extracts may have been taken. */
1464 nigel 77
1465 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1466 nigel 77
1467 nigel 91 offset_top = md->end_offset_top;
1468     eptr = md->end_match_ptr;
1469 nigel 77
1470 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1471     happens for a repeating ket if no characters were matched in the group.
1472     This is the forcible breaking of infinite loops as implemented in Perl
1473     5.005. If there is an options reset, it will get obeyed in the normal
1474     course of events. */
1475 nigel 77
1476 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1477     {
1478     ecode += 1+LINK_SIZE;
1479     break;
1480     }
1481 nigel 77
1482 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1483     preceding bracket, in the appropriate order. The second "call" of match()
1484     uses tail recursion, to avoid using another stack frame. We need to reset
1485     any options that changed within the bracket before re-running it, so
1486     check the next opcode. */
1487 nigel 77
1488 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1489     {
1490     ims = (ims & ~PCRE_IMS) | ecode[4];
1491     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1492     }
1493 nigel 77
1494 nigel 91 if (*ecode == OP_KETRMIN)
1495     {
1496 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1497 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1498     ecode = prev;
1499 ph10 197 flags = 0;
1500 nigel 91 goto TAIL_RECURSE;
1501 nigel 77 }
1502 nigel 91 else /* OP_KETRMAX */
1503     {
1504 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1505 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1506     ecode += 1 + LINK_SIZE;
1507 ph10 197 flags = 0;
1508 nigel 91 goto TAIL_RECURSE;
1509     }
1510     /* Control never gets here */
1511 nigel 77
1512     /* An alternation is the end of a branch; scan along to find the end of the
1513     bracketed group and go to there. */
1514    
1515     case OP_ALT:
1516     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1517     break;
1518    
1519 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1520     indicating that it may occur zero times. It may repeat infinitely, or not
1521     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1522     with fixed upper repeat limits are compiled as a number of copies, with the
1523     optional ones preceded by BRAZERO or BRAMINZERO. */
1524 nigel 77
1525     case OP_BRAZERO:
1526     {
1527     next = ecode+1;
1528 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1529 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530     do next += GET(next,1); while (*next == OP_ALT);
1531 nigel 93 ecode = next + 1 + LINK_SIZE;
1532 nigel 77 }
1533     break;
1534    
1535     case OP_BRAMINZERO:
1536     {
1537     next = ecode+1;
1538 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1539 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1540 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1541     ecode++;
1542     }
1543     break;
1544    
1545 ph10 335 case OP_SKIPZERO:
1546     {
1547     next = ecode+1;
1548     do next += GET(next,1); while (*next == OP_ALT);
1549     ecode = next + 1 + LINK_SIZE;
1550     }
1551     break;
1552    
1553 nigel 93 /* End of a group, repeated or non-repeating. */
1554 nigel 77
1555     case OP_KET:
1556     case OP_KETRMIN:
1557     case OP_KETRMAX:
1558 nigel 91 prev = ecode - GET(ecode, 1);
1559 nigel 77
1560 nigel 93 /* If this was a group that remembered the subject start, in order to break
1561     infinite repeats of empty string matches, retrieve the subject start from
1562     the chain. Otherwise, set it NULL. */
1563 nigel 77
1564 nigel 93 if (*prev >= OP_SBRA)
1565     {
1566     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1567     eptrb = eptrb->epb_prev; /* Backup to previous group */
1568     }
1569     else saved_eptr = NULL;
1570 nigel 77
1571 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1572     matching and return MATCH_MATCH, but record the current high water mark for
1573     use by positive assertions. We also need to record the match start in case
1574     it was changed by \K. */
1575 nigel 93
1576 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1577     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1578     *prev == OP_ONCE)
1579     {
1580     md->end_match_ptr = eptr; /* For ONCE */
1581     md->end_offset_top = offset_top;
1582 ph10 500 md->start_match_ptr = mstart;
1583 ph10 510 MRRETURN(MATCH_MATCH);
1584 nigel 91 }
1585 nigel 77
1586 nigel 93 /* For capturing groups we have to check the group number back at the start
1587     and if necessary complete handling an extraction by setting the offsets and
1588     bumping the high water mark. Note that whole-pattern recursion is coded as
1589     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1590     when the OP_END is reached. Other recursion is handled here. */
1591 nigel 77
1592 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1593 nigel 91 {
1594 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1595 nigel 91 offset = number << 1;
1596 ph10 461
1597 ph10 475 #ifdef PCRE_DEBUG
1598 nigel 91 printf("end bracket %d", number);
1599     printf("\n");
1600 nigel 77 #endif
1601    
1602 nigel 93 md->capture_last = number;
1603     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1604 nigel 91 {
1605 nigel 93 md->offset_vector[offset] =
1606     md->offset_vector[md->offset_end - number];
1607 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1608 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1609     }
1610 nigel 77
1611 nigel 93 /* Handle a recursively called group. Restore the offsets
1612     appropriately and continue from after the call. */
1613 nigel 77
1614 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1615     {
1616     recursion_info *rec = md->recursive;
1617     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1618     md->recursive = rec->prevrec;
1619     memcpy(md->offset_vector, rec->offset_save,
1620     rec->saved_max * sizeof(int));
1621 ph10 461 offset_top = rec->save_offset_top;
1622 nigel 93 ecode = rec->after_call;
1623     ims = original_ims;
1624     break;
1625 nigel 77 }
1626 nigel 91 }
1627 nigel 77
1628 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1629     flags, in case they got changed during the group. */
1630 nigel 77
1631 nigel 91 ims = original_ims;
1632     DPRINTF(("ims reset to %02lx\n", ims));
1633 nigel 77
1634 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1635     happens for a repeating ket if no characters were matched in the group.
1636     This is the forcible breaking of infinite loops as implemented in Perl
1637     5.005. If there is an options reset, it will get obeyed in the normal
1638     course of events. */
1639 nigel 77
1640 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1641     {
1642     ecode += 1 + LINK_SIZE;
1643     break;
1644     }
1645 nigel 77
1646 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1647     preceding bracket, in the appropriate order. In the second case, we can use
1648 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1649     unlimited repeat of a group that can match an empty string. */
1650 nigel 77
1651 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1652    
1653 nigel 91 if (*ecode == OP_KETRMIN)
1654     {
1655 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1656 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657 ph10 197 if (flags != 0) /* Could match an empty string */
1658     {
1659     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1660     RRETURN(rrc);
1661     }
1662 nigel 91 ecode = prev;
1663     goto TAIL_RECURSE;
1664 nigel 77 }
1665 nigel 91 else /* OP_KETRMAX */
1666     {
1667 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1668 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1669     ecode += 1 + LINK_SIZE;
1670 ph10 197 flags = 0;
1671 nigel 91 goto TAIL_RECURSE;
1672     }
1673     /* Control never gets here */
1674 nigel 77
1675     /* Start of subject unless notbol, or after internal newline if multiline */
1676    
1677     case OP_CIRC:
1678 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1679 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1680     {
1681 nigel 91 if (eptr != md->start_subject &&
1682 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1683 ph10 510 MRRETURN(MATCH_NOMATCH);
1684 nigel 77 ecode++;
1685     break;
1686     }
1687     /* ... else fall through */
1688    
1689     /* Start of subject assertion */
1690    
1691     case OP_SOD:
1692 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1693 nigel 77 ecode++;
1694     break;
1695    
1696     /* Start of match assertion */
1697    
1698     case OP_SOM:
1699 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1700 nigel 77 ecode++;
1701     break;
1702 ph10 172
1703 ph10 168 /* Reset the start of match point */
1704 ph10 172
1705 ph10 168 case OP_SET_SOM:
1706     mstart = eptr;
1707 ph10 172 ecode++;
1708     break;
1709 nigel 77
1710     /* Assert before internal newline if multiline, or before a terminating
1711     newline unless endonly is set, else end of subject unless noteol is set. */
1712    
1713     case OP_DOLL:
1714     if ((ims & PCRE_MULTILINE) != 0)
1715     {
1716     if (eptr < md->end_subject)
1717 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1718 nigel 77 else
1719 ph10 579 {
1720     if (md->noteol) MRRETURN(MATCH_NOMATCH);
1721 ph10 553 SCHECK_PARTIAL();
1722     }
1723 nigel 77 ecode++;
1724     break;
1725     }
1726 ph10 553 else /* Not multiline */
1727 nigel 77 {
1728 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1729 ph10 553 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1730 nigel 77 }
1731 ph10 579
1732 nigel 91 /* ... else fall through for endonly */
1733 nigel 77
1734     /* End of subject assertion (\z) */
1735    
1736     case OP_EOD:
1737 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1738 ph10 553 SCHECK_PARTIAL();
1739 nigel 77 ecode++;
1740     break;
1741    
1742     /* End of subject or ending \n assertion (\Z) */
1743    
1744     case OP_EODN:
1745 ph10 553 ASSERT_NL_OR_EOS:
1746     if (eptr < md->end_subject &&
1747 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1748 ph10 510 MRRETURN(MATCH_NOMATCH);
1749 ph10 579
1750 ph10 553 /* Either at end of string or \n before end. */
1751 ph10 579
1752 ph10 553 SCHECK_PARTIAL();
1753 nigel 77 ecode++;
1754     break;
1755    
1756     /* Word boundary assertions */
1757    
1758     case OP_NOT_WORD_BOUNDARY:
1759     case OP_WORD_BOUNDARY:
1760     {
1761    
1762     /* Find out if the previous and current characters are "word" characters.
1763     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1764 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1765 ph10 435 partial matching. */
1766 nigel 77
1767     #ifdef SUPPORT_UTF8
1768     if (utf8)
1769     {
1770 ph10 518 /* Get status of previous character */
1771 ph10 527
1772 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1773     {
1774 ph10 409 USPTR lastptr = eptr - 1;
1775 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1776 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1777 nigel 77 GETCHAR(c, lastptr);
1778 ph10 527 #ifdef SUPPORT_UCP
1779 ph10 518 if (md->use_ucp)
1780     {
1781     if (c == '_') prev_is_word = TRUE; else
1782 ph10 527 {
1783 ph10 518 int cat = UCD_CATEGORY(c);
1784     prev_is_word = (cat == ucp_L || cat == ucp_N);
1785 ph10 527 }
1786     }
1787     else
1788     #endif
1789 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1790     }
1791 ph10 527
1792 ph10 518 /* Get status of next character */
1793 ph10 527
1794 ph10 443 if (eptr >= md->end_subject)
1795 nigel 77 {
1796 ph10 443 SCHECK_PARTIAL();
1797     cur_is_word = FALSE;
1798 ph10 428 }
1799     else
1800     {
1801 nigel 77 GETCHAR(c, eptr);
1802 ph10 527 #ifdef SUPPORT_UCP
1803 ph10 518 if (md->use_ucp)
1804     {
1805     if (c == '_') cur_is_word = TRUE; else
1806 ph10 527 {
1807 ph10 518 int cat = UCD_CATEGORY(c);
1808     cur_is_word = (cat == ucp_L || cat == ucp_N);
1809 ph10 527 }
1810     }
1811     else
1812     #endif
1813 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1814     }
1815     }
1816     else
1817     #endif
1818    
1819 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1820 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1821 nigel 77
1822     {
1823 ph10 518 /* Get status of previous character */
1824 ph10 527
1825 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1826     {
1827 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1828 ph10 527 #ifdef SUPPORT_UCP
1829 ph10 518 if (md->use_ucp)
1830     {
1831 ph10 527 c = eptr[-1];
1832 ph10 518 if (c == '_') prev_is_word = TRUE; else
1833 ph10 527 {
1834 ph10 518 int cat = UCD_CATEGORY(c);
1835     prev_is_word = (cat == ucp_L || cat == ucp_N);
1836 ph10 527 }
1837     }
1838     else
1839     #endif
1840 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1841     }
1842 ph10 527
1843 ph10 518 /* Get status of next character */
1844 ph10 527
1845 ph10 443 if (eptr >= md->end_subject)
1846 ph10 428 {
1847 ph10 443 SCHECK_PARTIAL();
1848     cur_is_word = FALSE;
1849 ph10 428 }
1850 ph10 527 else
1851     #ifdef SUPPORT_UCP
1852 ph10 518 if (md->use_ucp)
1853     {
1854 ph10 527 c = *eptr;
1855 ph10 518 if (c == '_') cur_is_word = TRUE; else
1856 ph10 527 {
1857 ph10 518 int cat = UCD_CATEGORY(c);
1858     cur_is_word = (cat == ucp_L || cat == ucp_N);
1859 ph10 527 }
1860     }
1861     else
1862     #endif
1863 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1864 nigel 77 }
1865    
1866     /* Now see if the situation is what we want */
1867    
1868     if ((*ecode++ == OP_WORD_BOUNDARY)?
1869     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1870 ph10 510 MRRETURN(MATCH_NOMATCH);
1871 nigel 77 }
1872     break;
1873    
1874     /* Match a single character type; inline for speed */
1875    
1876     case OP_ANY:
1877 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1878 ph10 345 /* Fall through */
1879    
1880 ph10 341 case OP_ALLANY:
1881 ph10 443 if (eptr++ >= md->end_subject)
1882 ph10 428 {
1883 ph10 443 SCHECK_PARTIAL();
1884 ph10 510 MRRETURN(MATCH_NOMATCH);
1885 ph10 443 }
1886 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1887 nigel 77 ecode++;
1888     break;
1889    
1890     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1891     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1892    
1893     case OP_ANYBYTE:
1894 ph10 443 if (eptr++ >= md->end_subject)
1895 ph10 428 {
1896 ph10 443 SCHECK_PARTIAL();
1897 ph10 510 MRRETURN(MATCH_NOMATCH);
1898 ph10 443 }
1899 nigel 77 ecode++;
1900     break;
1901    
1902     case OP_NOT_DIGIT:
1903 ph10 443 if (eptr >= md->end_subject)
1904 ph10 428 {
1905 ph10 443 SCHECK_PARTIAL();
1906 ph10 510 MRRETURN(MATCH_NOMATCH);
1907 ph10 443 }
1908 nigel 77 GETCHARINCTEST(c, eptr);
1909     if (
1910     #ifdef SUPPORT_UTF8
1911     c < 256 &&
1912     #endif
1913     (md->ctypes[c] & ctype_digit) != 0
1914     )
1915 ph10 510 MRRETURN(MATCH_NOMATCH);
1916 nigel 77 ecode++;
1917     break;
1918    
1919     case OP_DIGIT:
1920 ph10 443 if (eptr >= md->end_subject)
1921 ph10 428 {
1922 ph10 443 SCHECK_PARTIAL();
1923 ph10 510 MRRETURN(MATCH_NOMATCH);
1924 ph10 443 }
1925 nigel 77 GETCHARINCTEST(c, eptr);
1926     if (
1927     #ifdef SUPPORT_UTF8
1928     c >= 256 ||
1929     #endif
1930     (md->ctypes[c] & ctype_digit) == 0
1931     )
1932 ph10 510 MRRETURN(MATCH_NOMATCH);
1933 nigel 77 ecode++;
1934     break;
1935    
1936     case OP_NOT_WHITESPACE:
1937 ph10 443 if (eptr >= md->end_subject)
1938 ph10 428 {
1939 ph10 443 SCHECK_PARTIAL();
1940 ph10 510 MRRETURN(MATCH_NOMATCH);
1941 ph10 443 }
1942 nigel 77 GETCHARINCTEST(c, eptr);
1943     if (
1944     #ifdef SUPPORT_UTF8
1945     c < 256 &&
1946     #endif
1947     (md->ctypes[c] & ctype_space) != 0
1948     )
1949 ph10 510 MRRETURN(MATCH_NOMATCH);
1950 nigel 77 ecode++;
1951     break;
1952    
1953     case OP_WHITESPACE:
1954 ph10 443 if (eptr >= md->end_subject)
1955 ph10 428 {
1956 ph10 443 SCHECK_PARTIAL();
1957 ph10 510 MRRETURN(MATCH_NOMATCH);
1958 ph10 443 }
1959 nigel 77 GETCHARINCTEST(c, eptr);
1960     if (
1961     #ifdef SUPPORT_UTF8
1962     c >= 256 ||
1963     #endif
1964     (md->ctypes[c] & ctype_space) == 0
1965     )
1966 ph10 510 MRRETURN(MATCH_NOMATCH);
1967 nigel 77 ecode++;
1968     break;
1969    
1970     case OP_NOT_WORDCHAR:
1971 ph10 443 if (eptr >= md->end_subject)
1972 ph10 428 {
1973 ph10 443 SCHECK_PARTIAL();
1974 ph10 510 MRRETURN(MATCH_NOMATCH);
1975 ph10 443 }
1976 nigel 77 GETCHARINCTEST(c, eptr);
1977     if (
1978     #ifdef SUPPORT_UTF8
1979     c < 256 &&
1980     #endif
1981     (md->ctypes[c] & ctype_word) != 0
1982     )
1983 ph10 510 MRRETURN(MATCH_NOMATCH);
1984 nigel 77 ecode++;
1985     break;
1986    
1987     case OP_WORDCHAR:
1988 ph10 443 if (eptr >= md->end_subject)
1989 ph10 428 {
1990 ph10 443 SCHECK_PARTIAL();
1991 ph10 510 MRRETURN(MATCH_NOMATCH);
1992 ph10 443 }
1993 nigel 77 GETCHARINCTEST(c, eptr);
1994     if (
1995     #ifdef SUPPORT_UTF8
1996     c >= 256 ||
1997     #endif
1998     (md->ctypes[c] & ctype_word) == 0
1999     )
2000 ph10 510 MRRETURN(MATCH_NOMATCH);
2001 nigel 77 ecode++;
2002     break;
2003    
2004 nigel 93 case OP_ANYNL:
2005 ph10 443 if (eptr >= md->end_subject)
2006 ph10 428 {
2007 ph10 443 SCHECK_PARTIAL();
2008 ph10 510 MRRETURN(MATCH_NOMATCH);
2009 ph10 443 }
2010 nigel 93 GETCHARINCTEST(c, eptr);
2011     switch(c)
2012     {
2013 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2014 nigel 93 case 0x000d:
2015     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2016     break;
2017 ph10 231
2018 nigel 93 case 0x000a:
2019 ph10 231 break;
2020    
2021 nigel 93 case 0x000b:
2022     case 0x000c:
2023     case 0x0085:
2024     case 0x2028:
2025     case 0x2029:
2026 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2027 nigel 93 break;
2028     }
2029     ecode++;
2030     break;
2031    
2032 ph10 178 case OP_NOT_HSPACE:
2033 ph10 443 if (eptr >= md->end_subject)
2034 ph10 428 {
2035 ph10 443 SCHECK_PARTIAL();
2036 ph10 510 MRRETURN(MATCH_NOMATCH);
2037 ph10 443 }
2038 ph10 178 GETCHARINCTEST(c, eptr);
2039     switch(c)
2040     {
2041     default: break;
2042     case 0x09: /* HT */
2043     case 0x20: /* SPACE */
2044     case 0xa0: /* NBSP */
2045     case 0x1680: /* OGHAM SPACE MARK */
2046     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2047     case 0x2000: /* EN QUAD */
2048     case 0x2001: /* EM QUAD */
2049     case 0x2002: /* EN SPACE */
2050     case 0x2003: /* EM SPACE */
2051     case 0x2004: /* THREE-PER-EM SPACE */
2052     case 0x2005: /* FOUR-PER-EM SPACE */
2053     case 0x2006: /* SIX-PER-EM SPACE */
2054     case 0x2007: /* FIGURE SPACE */
2055     case 0x2008: /* PUNCTUATION SPACE */
2056     case 0x2009: /* THIN SPACE */
2057     case 0x200A: /* HAIR SPACE */
2058     case 0x202f: /* NARROW NO-BREAK SPACE */
2059     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2060     case 0x3000: /* IDEOGRAPHIC SPACE */
2061 ph10 510 MRRETURN(MATCH_NOMATCH);
2062 ph10 178 }
2063     ecode++;
2064     break;
2065    
2066     case OP_HSPACE:
2067 ph10 443 if (eptr >= md->end_subject)
2068 ph10 428 {
2069 ph10 443 SCHECK_PARTIAL();
2070 ph10 510 MRRETURN(MATCH_NOMATCH);
2071 ph10 443 }
2072 ph10 178 GETCHARINCTEST(c, eptr);
2073     switch(c)
2074     {
2075 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2076 ph10 178 case 0x09: /* HT */
2077     case 0x20: /* SPACE */
2078     case 0xa0: /* NBSP */
2079     case 0x1680: /* OGHAM SPACE MARK */
2080     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2081     case 0x2000: /* EN QUAD */
2082     case 0x2001: /* EM QUAD */
2083     case 0x2002: /* EN SPACE */
2084     case 0x2003: /* EM SPACE */
2085     case 0x2004: /* THREE-PER-EM SPACE */
2086     case 0x2005: /* FOUR-PER-EM SPACE */
2087     case 0x2006: /* SIX-PER-EM SPACE */
2088     case 0x2007: /* FIGURE SPACE */
2089     case 0x2008: /* PUNCTUATION SPACE */
2090     case 0x2009: /* THIN SPACE */
2091     case 0x200A: /* HAIR SPACE */
2092     case 0x202f: /* NARROW NO-BREAK SPACE */
2093     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2094     case 0x3000: /* IDEOGRAPHIC SPACE */
2095     break;
2096     }
2097     ecode++;
2098     break;
2099    
2100     case OP_NOT_VSPACE:
2101 ph10 443 if (eptr >= md->end_subject)
2102 ph10 428 {
2103 ph10 443 SCHECK_PARTIAL();
2104 ph10 510 MRRETURN(MATCH_NOMATCH);
2105 ph10 443 }
2106 ph10 178 GETCHARINCTEST(c, eptr);
2107     switch(c)
2108     {
2109     default: break;
2110     case 0x0a: /* LF */
2111     case 0x0b: /* VT */
2112     case 0x0c: /* FF */
2113     case 0x0d: /* CR */
2114     case 0x85: /* NEL */
2115     case 0x2028: /* LINE SEPARATOR */
2116     case 0x2029: /* PARAGRAPH SEPARATOR */
2117 ph10 510 MRRETURN(MATCH_NOMATCH);
2118 ph10 178 }
2119     ecode++;
2120     break;
2121    
2122     case OP_VSPACE:
2123 ph10 443 if (eptr >= md->end_subject)
2124 ph10 428 {
2125 ph10 443 SCHECK_PARTIAL();
2126 ph10 510 MRRETURN(MATCH_NOMATCH);
2127 ph10 443 }
2128 ph10 178 GETCHARINCTEST(c, eptr);
2129     switch(c)
2130     {
2131 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2132 ph10 178 case 0x0a: /* LF */
2133     case 0x0b: /* VT */
2134     case 0x0c: /* FF */
2135     case 0x0d: /* CR */
2136     case 0x85: /* NEL */
2137     case 0x2028: /* LINE SEPARATOR */
2138     case 0x2029: /* PARAGRAPH SEPARATOR */
2139     break;
2140     }
2141     ecode++;
2142     break;
2143    
2144 nigel 77 #ifdef SUPPORT_UCP
2145     /* Check the next character by Unicode property. We will get here only
2146     if the support is in the binary; otherwise a compile-time error occurs. */
2147    
2148     case OP_PROP:
2149     case OP_NOTPROP:
2150 ph10 443 if (eptr >= md->end_subject)
2151 ph10 428 {
2152 ph10 443 SCHECK_PARTIAL();
2153 ph10 510 MRRETURN(MATCH_NOMATCH);
2154 ph10 443 }
2155 nigel 77 GETCHARINCTEST(c, eptr);
2156     {
2157 ph10 384 const ucd_record *prop = GET_UCD(c);
2158 nigel 77
2159 nigel 87 switch(ecode[1])
2160     {
2161     case PT_ANY:
2162 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2163 nigel 87 break;
2164 nigel 77
2165 nigel 87 case PT_LAMP:
2166 ph10 349 if ((prop->chartype == ucp_Lu ||
2167     prop->chartype == ucp_Ll ||
2168     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2169 ph10 510 MRRETURN(MATCH_NOMATCH);
2170 ph10 517 break;
2171 nigel 87
2172     case PT_GC:
2173 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2174 ph10 510 MRRETURN(MATCH_NOMATCH);
2175 nigel 87 break;
2176    
2177     case PT_PC:
2178 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2179 ph10 510 MRRETURN(MATCH_NOMATCH);
2180 nigel 87 break;
2181    
2182     case PT_SC:
2183 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2184 ph10 510 MRRETURN(MATCH_NOMATCH);
2185 nigel 87 break;
2186 ph10 527
2187 ph10 517 /* These are specials */
2188 ph10 527
2189 ph10 517 case PT_ALNUM:
2190     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2191     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2192     MRRETURN(MATCH_NOMATCH);
2193 ph10 527 break;
2194    
2195 ph10 517 case PT_SPACE: /* Perl space */
2196     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2197     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2198     == (op == OP_NOTPROP))
2199     MRRETURN(MATCH_NOMATCH);
2200 ph10 527 break;
2201    
2202 ph10 517 case PT_PXSPACE: /* POSIX space */
2203     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2204 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2205 ph10 517 c == CHAR_FF || c == CHAR_CR)
2206     == (op == OP_NOTPROP))
2207     MRRETURN(MATCH_NOMATCH);
2208 ph10 527 break;
2209 nigel 87
2210 ph10 527 case PT_WORD:
2211 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2212 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2213 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2214     MRRETURN(MATCH_NOMATCH);
2215 ph10 527 break;
2216    
2217 ph10 517 /* This should never occur */
2218    
2219 nigel 87 default:
2220     RRETURN(PCRE_ERROR_INTERNAL);
2221 nigel 77 }
2222 nigel 87
2223     ecode += 3;
2224 nigel 77 }
2225     break;
2226    
2227     /* Match an extended Unicode sequence. We will get here only if the support
2228     is in the binary; otherwise a compile-time error occurs. */
2229    
2230     case OP_EXTUNI:
2231 ph10 443 if (eptr >= md->end_subject)
2232 ph10 428 {
2233 ph10 443 SCHECK_PARTIAL();
2234 ph10 510 MRRETURN(MATCH_NOMATCH);
2235 ph10 443 }
2236 nigel 77 GETCHARINCTEST(c, eptr);
2237     {
2238 ph10 349 int category = UCD_CATEGORY(c);
2239 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2240 nigel 77 while (eptr < md->end_subject)
2241     {
2242     int len = 1;
2243     if (!utf8) c = *eptr; else
2244     {
2245     GETCHARLEN(c, eptr, len);
2246     }
2247 ph10 349 category = UCD_CATEGORY(c);
2248 nigel 77 if (category != ucp_M) break;
2249     eptr += len;
2250     }
2251     }
2252     ecode++;
2253     break;
2254     #endif
2255    
2256    
2257     /* Match a back reference, possibly repeatedly. Look past the end of the
2258     item to see if there is repeat information following. The code is similar
2259     to that for character classes, but repeated for efficiency. Then obey
2260     similar code to character type repeats - written out again for speed.
2261     However, if the referenced string is the empty string, always treat
2262     it as matched, any number of times (otherwise there could be infinite
2263     loops). */
2264    
2265     case OP_REF:
2266 ph10 595 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2267     ecode += 3;
2268 ph10 345
2269 ph10 595 /* If the reference is unset, there are two possibilities:
2270 ph10 345
2271 ph10 595 (a) In the default, Perl-compatible state, set the length negative;
2272     this ensures that every attempt at a match fails. We can't just fail
2273     here, because of the possibility of quantifiers with zero minima.
2274 ph10 345
2275 ph10 595 (b) If the JavaScript compatibility flag is set, set the length to zero
2276     so that the back reference matches an empty string.
2277 ph10 345
2278 ph10 595 Otherwise, set the length to the length of what was matched by the
2279     referenced subpattern. */
2280 ph10 345
2281 ph10 595 if (offset >= offset_top || md->offset_vector[offset] < 0)
2282     length = (md->jscript_compat)? 0 : -1;
2283     else
2284     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2285 nigel 77
2286 ph10 595 /* Set up for repetition, or handle the non-repeated case */
2287 nigel 77
2288 ph10 595 switch (*ecode)
2289     {
2290     case OP_CRSTAR:
2291     case OP_CRMINSTAR:
2292     case OP_CRPLUS:
2293     case OP_CRMINPLUS:
2294     case OP_CRQUERY:
2295     case OP_CRMINQUERY:
2296     c = *ecode++ - OP_CRSTAR;
2297     minimize = (c & 1) != 0;
2298     min = rep_min[c]; /* Pick up values from tables; */
2299     max = rep_max[c]; /* zero for max => infinity */
2300     if (max == 0) max = INT_MAX;
2301     break;
2302 nigel 77
2303 ph10 595 case OP_CRRANGE:
2304     case OP_CRMINRANGE:
2305     minimize = (*ecode == OP_CRMINRANGE);
2306     min = GET2(ecode, 1);
2307     max = GET2(ecode, 3);
2308     if (max == 0) max = INT_MAX;
2309     ecode += 5;
2310     break;
2311 nigel 77
2312 ph10 595 default: /* No repeat follows */
2313     if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2314     {
2315     CHECK_PARTIAL();
2316     MRRETURN(MATCH_NOMATCH);
2317 nigel 77 }
2318 ph10 595 eptr += length;
2319     continue; /* With the main loop */
2320     }
2321 nigel 77
2322 ph10 595 /* Handle repeated back references. If the length of the reference is
2323     zero, just continue with the main loop. */
2324 ph10 443
2325 ph10 595 if (length == 0) continue;
2326 nigel 77
2327 ph10 595 /* First, ensure the minimum number of matches are present. We get back
2328     the length of the reference string explicitly rather than passing the
2329     address of eptr, so that eptr can be a register variable. */
2330 nigel 77
2331 ph10 595 for (i = 1; i <= min; i++)
2332     {
2333     int slength;
2334     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2335 nigel 77 {
2336 ph10 595 CHECK_PARTIAL();
2337     MRRETURN(MATCH_NOMATCH);
2338 nigel 77 }
2339 ph10 595 eptr += slength;
2340     }
2341 nigel 77
2342 ph10 595 /* If min = max, continue at the same level without recursion.
2343     They are not both allowed to be zero. */
2344 nigel 77
2345 ph10 595 if (min == max) continue;
2346 nigel 77
2347 ph10 595 /* If minimizing, keep trying and advancing the pointer */
2348 nigel 77
2349 ph10 595 if (minimize)
2350     {
2351     for (fi = min;; fi++)
2352 nigel 77 {
2353 ph10 595 int slength;
2354     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2355     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356     if (fi >= max) MRRETURN(MATCH_NOMATCH);
2357     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2358 nigel 77 {
2359 ph10 595 CHECK_PARTIAL();
2360     MRRETURN(MATCH_NOMATCH);
2361 nigel 77 }
2362 ph10 595 eptr += slength;
2363 nigel 77 }
2364 ph10 595 /* Control never gets here */
2365     }
2366 nigel 77
2367 ph10 595 /* If maximizing, find the longest string and work backwards */
2368 nigel 77
2369 ph10 595 else
2370     {
2371     pp = eptr;
2372     for (i = min; i < max; i++)
2373 nigel 77 {
2374 ph10 595 int slength;
2375     if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2376 nigel 77 {
2377 ph10 595 CHECK_PARTIAL();
2378     break;
2379 nigel 77 }
2380 ph10 595 eptr += slength;
2381 nigel 77 }
2382 ph10 595 while (eptr >= pp)
2383     {
2384     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2385     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386     eptr -= length;
2387     }
2388     MRRETURN(MATCH_NOMATCH);
2389 nigel 77 }
2390     /* Control never gets here */
2391    
2392     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2393     used when all the characters in the class have values in the range 0-255,
2394     and either the matching is caseful, or the characters are in the range
2395     0-127 when UTF-8 processing is enabled. The only difference between
2396     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2397     encountered.
2398    
2399     First, look past the end of the item to see if there is repeat information
2400     following. Then obey similar code to character type repeats - written out
2401     again for speed. */
2402    
2403     case OP_NCLASS:
2404     case OP_CLASS:
2405     {
2406     data = ecode + 1; /* Save for matching */
2407     ecode += 33; /* Advance past the item */
2408    
2409     switch (*ecode)
2410     {
2411     case OP_CRSTAR:
2412     case OP_CRMINSTAR:
2413     case OP_CRPLUS:
2414     case OP_CRMINPLUS:
2415     case OP_CRQUERY:
2416     case OP_CRMINQUERY:
2417     c = *ecode++ - OP_CRSTAR;
2418     minimize = (c & 1) != 0;
2419     min = rep_min[c]; /* Pick up values from tables; */
2420     max = rep_max[c]; /* zero for max => infinity */
2421     if (max == 0) max = INT_MAX;
2422     break;
2423    
2424     case OP_CRRANGE:
2425     case OP_CRMINRANGE:
2426     minimize = (*ecode == OP_CRMINRANGE);
2427     min = GET2(ecode, 1);
2428     max = GET2(ecode, 3);
2429     if (max == 0) max = INT_MAX;
2430     ecode += 5;
2431     break;
2432    
2433     default: /* No repeat follows */
2434     min = max = 1;
2435     break;
2436     }
2437    
2438     /* First, ensure the minimum number of matches are present. */
2439    
2440     #ifdef SUPPORT_UTF8
2441     /* UTF-8 mode */
2442     if (utf8)
2443     {
2444     for (i = 1; i <= min; i++)
2445     {
2446 ph10 427 if (eptr >= md->end_subject)
2447 ph10 426 {
2448 ph10 428 SCHECK_PARTIAL();
2449 ph10 510 MRRETURN(MATCH_NOMATCH);
2450 ph10 427 }
2451 nigel 77 GETCHARINC(c, eptr);
2452     if (c > 255)
2453     {
2454 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2455 nigel 77 }
2456     else
2457     {
2458 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2459 nigel 77 }
2460     }
2461     }
2462     else
2463     #endif
2464     /* Not UTF-8 mode */
2465     {
2466     for (i = 1; i <= min; i++)
2467     {
2468 ph10 427 if (eptr >= md->end_subject)
2469 ph10 426 {
2470 ph10 428 SCHECK_PARTIAL();
2471 ph10 510 MRRETURN(MATCH_NOMATCH);
2472 ph10 427 }
2473 nigel 77 c = *eptr++;
2474 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2475 nigel 77 }
2476     }
2477    
2478     /* If max == min we can continue with the main loop without the
2479     need to recurse. */
2480    
2481     if (min == max) continue;
2482    
2483     /* If minimizing, keep testing the rest of the expression and advancing
2484     the pointer while it matches the class. */
2485    
2486     if (minimize)
2487     {
2488     #ifdef SUPPORT_UTF8
2489     /* UTF-8 mode */
2490     if (utf8)
2491     {
2492     for (fi = min;; fi++)
2493     {
2494 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2495 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2496 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2497 ph10 427 if (eptr >= md->end_subject)
2498 ph10 426 {
2499 ph10 427 SCHECK_PARTIAL();
2500 ph10 510 MRRETURN(MATCH_NOMATCH);
2501 ph10 427 }
2502 nigel 77 GETCHARINC(c, eptr);
2503     if (c > 255)
2504     {
2505 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2506 nigel 77 }
2507     else
2508     {
2509 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2510 nigel 77 }
2511     }
2512     }
2513     else
2514     #endif
2515     /* Not UTF-8 mode */
2516     {
2517     for (fi = min;; fi++)
2518     {
2519 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2520 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522 ph10 427 if (eptr >= md->end_subject)
2523 ph10 426 {
2524 ph10 427 SCHECK_PARTIAL();
2525 ph10 510 MRRETURN(MATCH_NOMATCH);
2526 ph10 427 }
2527 nigel 77 c = *eptr++;
2528 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2529 nigel 77 }
2530     }
2531     /* Control never gets here */
2532     }
2533    
2534     /* If maximizing, find the longest possible run, then work backwards. */
2535    
2536     else
2537     {
2538     pp = eptr;
2539    
2540     #ifdef SUPPORT_UTF8
2541     /* UTF-8 mode */
2542     if (utf8)
2543     {
2544     for (i = min; i < max; i++)
2545     {
2546     int len = 1;
2547 ph10 463 if (eptr >= md->end_subject)
2548 ph10 462 {
2549 ph10 463 SCHECK_PARTIAL();
2550 ph10 462 break;
2551 ph10 463 }
2552 nigel 77 GETCHARLEN(c, eptr, len);
2553     if (c > 255)
2554     {
2555     if (op == OP_CLASS) break;
2556     }
2557     else
2558     {
2559     if ((data[c/8] & (1 << (c&7))) == 0) break;
2560     }
2561     eptr += len;
2562     }
2563     for (;;)
2564     {
2565 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2566 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2567     if (eptr-- == pp) break; /* Stop if tried at original pos */
2568     BACKCHAR(eptr);
2569     }
2570     }
2571     else
2572     #endif
2573     /* Not UTF-8 mode */
2574     {
2575     for (i = min; i < max; i++)
2576     {
2577 ph10 463 if (eptr >= md->end_subject)
2578 ph10 462 {
2579 ph10 463 SCHECK_PARTIAL();
2580 ph10 462 break;
2581 ph10 463 }
2582 nigel 77 c = *eptr;
2583     if ((data[c/8] & (1 << (c&7))) == 0) break;
2584     eptr++;
2585     }
2586     while (eptr >= pp)
2587     {
2588 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2589 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590 nigel 77 eptr--;
2591     }
2592     }
2593    
2594 ph10 510 MRRETURN(MATCH_NOMATCH);
2595 nigel 77 }
2596     }
2597     /* Control never gets here */
2598    
2599    
2600     /* Match an extended character class. This opcode is encountered only
2601 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2602     mode, because Unicode properties are supported in non-UTF-8 mode. */
2603 nigel 77
2604     #ifdef SUPPORT_UTF8
2605     case OP_XCLASS:
2606     {
2607     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2608     ecode += GET(ecode, 1); /* Advance past the item */
2609    
2610     switch (*ecode)
2611     {
2612     case OP_CRSTAR:
2613     case OP_CRMINSTAR:
2614     case OP_CRPLUS:
2615     case OP_CRMINPLUS:
2616     case OP_CRQUERY:
2617     case OP_CRMINQUERY:
2618     c = *ecode++ - OP_CRSTAR;
2619     minimize = (c & 1) != 0;
2620     min = rep_min[c]; /* Pick up values from tables; */
2621     max = rep_max[c]; /* zero for max => infinity */
2622     if (max == 0) max = INT_MAX;
2623     break;
2624    
2625     case OP_CRRANGE:
2626     case OP_CRMINRANGE:
2627     minimize = (*ecode == OP_CRMINRANGE);
2628     min = GET2(ecode, 1);
2629     max = GET2(ecode, 3);
2630     if (max == 0) max = INT_MAX;
2631     ecode += 5;
2632     break;
2633    
2634     default: /* No repeat follows */
2635     min = max = 1;
2636     break;
2637     }
2638    
2639     /* First, ensure the minimum number of matches are present. */
2640    
2641     for (i = 1; i <= min; i++)
2642     {
2643 ph10 427 if (eptr >= md->end_subject)
2644 ph10 426 {
2645     SCHECK_PARTIAL();
2646 ph10 510 MRRETURN(MATCH_NOMATCH);
2647 ph10 427 }
2648 ph10 384 GETCHARINCTEST(c, eptr);
2649 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2650 nigel 77 }
2651    
2652     /* If max == min we can continue with the main loop without the
2653     need to recurse. */
2654    
2655     if (min == max) continue;
2656    
2657     /* If minimizing, keep testing the rest of the expression and advancing
2658     the pointer while it matches the class. */
2659    
2660     if (minimize)
2661     {
2662     for (fi = min;; fi++)
2663     {
2664 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2665 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2667 ph10 427 if (eptr >= md->end_subject)
2668 ph10 426 {
2669 ph10 427 SCHECK_PARTIAL();
2670 ph10 510 MRRETURN(MATCH_NOMATCH);
2671 ph10 427 }
2672 ph10 384 GETCHARINCTEST(c, eptr);
2673 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2674 nigel 77 }
2675     /* Control never gets here */
2676     }
2677    
2678     /* If maximizing, find the longest possible run, then work backwards. */
2679    
2680     else
2681     {
2682     pp = eptr;
2683     for (i = min; i < max; i++)
2684     {
2685     int len = 1;
2686 ph10 463 if (eptr >= md->end_subject)
2687 ph10 462 {
2688 ph10 463 SCHECK_PARTIAL();
2689 ph10 462 break;
2690 ph10 463 }
2691 ph10 384 GETCHARLENTEST(c, eptr, len);
2692 nigel 77 if (!_pcre_xclass(c, data)) break;
2693     eptr += len;
2694     }
2695     for(;;)
2696     {
2697 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2698 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2699     if (eptr-- == pp) break; /* Stop if tried at original pos */
2700 ph10 214 if (utf8) BACKCHAR(eptr);
2701 nigel 77 }
2702 ph10 510 MRRETURN(MATCH_NOMATCH);
2703 nigel 77 }
2704    
2705     /* Control never gets here */
2706     }
2707     #endif /* End of XCLASS */
2708    
2709     /* Match a single character, casefully */
2710    
2711     case OP_CHAR:
2712     #ifdef SUPPORT_UTF8
2713     if (utf8)
2714     {
2715     length = 1;
2716     ecode++;
2717     GETCHARLEN(fc, ecode, length);
2718 ph10 443 if (length > md->end_subject - eptr)
2719 ph10 428 {
2720     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2721 ph10 510 MRRETURN(MATCH_NOMATCH);
2722 ph10 443 }
2723 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2724 nigel 77 }
2725     else
2726     #endif
2727    
2728     /* Non-UTF-8 mode */
2729     {
2730 ph10 443 if (md->end_subject - eptr < 1)
2731 ph10 428 {
2732     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2733 ph10 510 MRRETURN(MATCH_NOMATCH);
2734 ph10 443 }
2735 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2736 nigel 77 ecode += 2;
2737     }
2738     break;
2739    
2740     /* Match a single character, caselessly */
2741    
2742     case OP_CHARNC:
2743     #ifdef SUPPORT_UTF8
2744     if (utf8)
2745     {
2746     length = 1;
2747     ecode++;
2748     GETCHARLEN(fc, ecode, length);
2749    
2750 ph10 443 if (length > md->end_subject - eptr)
2751 ph10 428 {
2752     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2753 ph10 510 MRRETURN(MATCH_NOMATCH);
2754 ph10 443 }
2755 nigel 77
2756     /* If the pattern character's value is < 128, we have only one byte, and
2757     can use the fast lookup table. */
2758    
2759     if (fc < 128)
2760     {
2761 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2762 nigel 77 }
2763    
2764     /* Otherwise we must pick up the subject character */
2765    
2766     else
2767     {
2768 nigel 93 unsigned int dc;
2769 nigel 77 GETCHARINC(dc, eptr);
2770     ecode += length;
2771    
2772     /* If we have Unicode property support, we can use it to test the other
2773 nigel 87 case of the character, if there is one. */
2774 nigel 77
2775     if (fc != dc)
2776     {
2777     #ifdef SUPPORT_UCP
2778 ph10 349 if (dc != UCD_OTHERCASE(fc))
2779 nigel 77 #endif
2780 ph10 510 MRRETURN(MATCH_NOMATCH);
2781 nigel 77 }
2782     }
2783     }
2784     else
2785     #endif /* SUPPORT_UTF8 */
2786    
2787     /* Non-UTF-8 mode */
2788     {
2789 ph10 443 if (md->end_subject - eptr < 1)
2790 ph10 428 {
2791 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2792 ph10 510 MRRETURN(MATCH_NOMATCH);
2793 ph10 443 }
2794 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2795 nigel 77 ecode += 2;
2796     }
2797     break;
2798    
2799 nigel 93 /* Match a single character repeatedly. */
2800 nigel 77
2801     case OP_EXACT:
2802     min = max = GET2(ecode, 1);
2803     ecode += 3;
2804     goto REPEATCHAR;
2805    
2806 nigel 93 case OP_POSUPTO:
2807     possessive = TRUE;
2808     /* Fall through */
2809    
2810 nigel 77 case OP_UPTO:
2811     case OP_MINUPTO:
2812     min = 0;
2813     max = GET2(ecode, 1);
2814     minimize = *ecode == OP_MINUPTO;
2815     ecode += 3;
2816     goto REPEATCHAR;
2817    
2818 nigel 93 case OP_POSSTAR:
2819     possessive = TRUE;
2820     min = 0;
2821     max = INT_MAX;
2822     ecode++;
2823     goto REPEATCHAR;
2824    
2825     case OP_POSPLUS:
2826     possessive = TRUE;
2827     min = 1;
2828     max = INT_MAX;
2829     ecode++;
2830     goto REPEATCHAR;
2831    
2832     case OP_POSQUERY:
2833     possessive = TRUE;
2834     min = 0;
2835     max = 1;
2836     ecode++;
2837     goto REPEATCHAR;
2838    
2839 nigel 77 case OP_STAR:
2840     case OP_MINSTAR:
2841     case OP_PLUS:
2842     case OP_MINPLUS:
2843     case OP_QUERY:
2844     case OP_MINQUERY:
2845     c = *ecode++ - OP_STAR;
2846     minimize = (c & 1) != 0;
2847 ph10 443
2848 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2849     max = rep_max[c]; /* zero for max => infinity */
2850     if (max == 0) max = INT_MAX;
2851    
2852 ph10 426 /* Common code for all repeated single-character matches. */
2853 nigel 77
2854     REPEATCHAR:
2855     #ifdef SUPPORT_UTF8
2856     if (utf8)
2857     {
2858     length = 1;
2859     charptr = ecode;
2860     GETCHARLEN(fc, ecode, length);
2861     ecode += length;
2862    
2863     /* Handle multibyte character matching specially here. There is
2864     support for caseless matching if UCP support is present. */
2865    
2866     if (length > 1)
2867     {
2868     #ifdef SUPPORT_UCP
2869 nigel 93 unsigned int othercase;
2870 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2871 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2872 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2873 ph10 115 else oclength = 0;
2874 nigel 77 #endif /* SUPPORT_UCP */
2875    
2876     for (i = 1; i <= min; i++)
2877     {
2878 ph10 426 if (eptr <= md->end_subject - length &&
2879     memcmp(eptr, charptr, length) == 0) eptr += length;
2880 ph10 123 #ifdef SUPPORT_UCP
2881 ph10 426 else if (oclength > 0 &&
2882     eptr <= md->end_subject - oclength &&
2883     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2884     #endif /* SUPPORT_UCP */
2885 nigel 77 else
2886     {
2887 ph10 426 CHECK_PARTIAL();
2888 ph10 510 MRRETURN(MATCH_NOMATCH);
2889 nigel 77 }
2890     }
2891    
2892     if (min == max) continue;
2893    
2894     if (minimize)
2895     {
2896     for (fi = min;; fi++)
2897     {
2898 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2899 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2900 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2901 ph10 426 if (eptr <= md->end_subject - length &&
2902     memcmp(eptr, charptr, length) == 0) eptr += length;
2903 ph10 123 #ifdef SUPPORT_UCP
2904 ph10 426 else if (oclength > 0 &&
2905     eptr <= md->end_subject - oclength &&
2906     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2907     #endif /* SUPPORT_UCP */
2908 nigel 77 else
2909     {
2910 ph10 426 CHECK_PARTIAL();
2911 ph10 510 MRRETURN(MATCH_NOMATCH);
2912 nigel 77 }
2913     }
2914     /* Control never gets here */
2915     }
2916 nigel 93
2917     else /* Maximize */
2918 nigel 77 {
2919     pp = eptr;
2920     for (i = min; i < max; i++)
2921     {
2922 ph10 426 if (eptr <= md->end_subject - length &&
2923     memcmp(eptr, charptr, length) == 0) eptr += length;
2924 ph10 123 #ifdef SUPPORT_UCP
2925 ph10 426 else if (oclength > 0 &&
2926     eptr <= md->end_subject - oclength &&
2927     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2928     #endif /* SUPPORT_UCP */
2929 ph10 463 else
2930 ph10 462 {
2931 ph10 463 CHECK_PARTIAL();
2932 ph10 462 break;
2933 ph10 463 }
2934 nigel 77 }
2935 nigel 93
2936     if (possessive) continue;
2937 ph10 427
2938 ph10 120 for(;;)
2939 ph10 426 {
2940     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2941     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2942 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2943 ph10 115 #ifdef SUPPORT_UCP
2944 ph10 426 eptr--;
2945     BACKCHAR(eptr);
2946 ph10 123 #else /* without SUPPORT_UCP */
2947 ph10 426 eptr -= length;
2948 ph10 123 #endif /* SUPPORT_UCP */
2949 ph10 426 }
2950 nigel 77 }
2951     /* Control never gets here */
2952     }
2953    
2954     /* If the length of a UTF-8 character is 1, we fall through here, and
2955     obey the code as for non-UTF-8 characters below, though in this case the
2956     value of fc will always be < 128. */
2957     }
2958     else
2959     #endif /* SUPPORT_UTF8 */
2960    
2961     /* When not in UTF-8 mode, load a single-byte character. */
2962    
2963 ph10 426 fc = *ecode++;
2964 ph10 443
2965 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2966     may not be in UTF-8 mode. The code is duplicated for the caseless and
2967     caseful cases, for speed, since matching characters is likely to be quite
2968     common. First, ensure the minimum number of matches are present. If min =
2969     max, continue at the same level without recursing. Otherwise, if
2970     minimizing, keep trying the rest of the expression and advancing one
2971     matching character if failing, up to the maximum. Alternatively, if
2972     maximizing, find the maximum number of characters and work backwards. */
2973    
2974     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2975     max, eptr));
2976    
2977     if ((ims & PCRE_CASELESS) != 0)
2978     {
2979     fc = md->lcc[fc];
2980     for (i = 1; i <= min; i++)
2981 ph10 426 {
2982     if (eptr >= md->end_subject)
2983     {
2984     SCHECK_PARTIAL();
2985 ph10 510 MRRETURN(MATCH_NOMATCH);
2986 ph10 426 }
2987 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988 ph10 426 }
2989 nigel 77 if (min == max) continue;
2990     if (minimize)
2991     {
2992     for (fi = min;; fi++)
2993     {
2994 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2995 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2996 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2997 ph10 426 if (eptr >= md->end_subject)
2998     {
2999 ph10 427 SCHECK_PARTIAL();
3000 ph10 510 MRRETURN(MATCH_NOMATCH);
3001 ph10 426 }
3002 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3003 nigel 77 }
3004     /* Control never gets here */
3005     }
3006 nigel 93 else /* Maximize */
3007 nigel 77 {
3008     pp = eptr;
3009     for (i = min; i < max; i++)
3010     {
3011 ph10 463 if (eptr >= md->end_subject)
3012 ph10 462 {
3013     SCHECK_PARTIAL();
3014     break;
3015 ph10 463 }
3016 ph10 462 if (fc != md->lcc[*eptr]) break;
3017 nigel 77 eptr++;
3018     }
3019 ph10 427
3020 nigel 93 if (possessive) continue;
3021 ph10 427
3022 nigel 77 while (eptr >= pp)
3023     {
3024 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3025 nigel 77 eptr--;
3026     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027     }
3028 ph10 510 MRRETURN(MATCH_NOMATCH);
3029 nigel 77 }
3030     /* Control never gets here */
3031     }
3032    
3033     /* Caseful comparisons (includes all multi-byte characters) */
3034    
3035     else
3036     {
3037 ph10 427 for (i = 1; i <= min; i++)
3038 ph10 426 {
3039     if (eptr >= md->end_subject)
3040     {
3041     SCHECK_PARTIAL();
3042 ph10 510 MRRETURN(MATCH_NOMATCH);
3043 ph10 426 }
3044 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3045 ph10 427 }
3046 ph10 443
3047 nigel 77 if (min == max) continue;
3048 ph10 443
3049 nigel 77 if (minimize)
3050     {
3051     for (fi = min;; fi++)
3052     {
3053 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3054 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3055 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3056 ph10 426 if (eptr >= md->end_subject)
3057 ph10 427 {
3058 ph10 426 SCHECK_PARTIAL();
3059 ph10 510 MRRETURN(MATCH_NOMATCH);
3060 ph10 427 }
3061 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3062 nigel 77 }
3063     /* Control never gets here */
3064     }
3065 nigel 93 else /* Maximize */
3066 nigel 77 {
3067     pp = eptr;
3068     for (i = min; i < max; i++)
3069     {
3070 ph10 463 if (eptr >= md->end_subject)
3071 ph10 462 {
3072 ph10 463 SCHECK_PARTIAL();
3073 ph10 462 break;
3074 ph10 463 }
3075 ph10 462 if (fc != *eptr) break;
3076 nigel 77 eptr++;
3077     }
3078 nigel 93 if (possessive) continue;
3079 ph10 443
3080 nigel 77 while (eptr >= pp)
3081     {
3082 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3083 nigel 77 eptr--;
3084     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3085     }
3086 ph10 510 MRRETURN(MATCH_NOMATCH);
3087 nigel 77 }
3088     }
3089     /* Control never gets here */
3090    
3091     /* Match a negated single one-byte character. The character we are
3092     checking can be multibyte. */
3093    
3094     case OP_NOT:
3095 ph10 443 if (eptr >= md->end_subject)
3096 ph10 428 {
3097 ph10 443 SCHECK_PARTIAL();
3098 ph10 510 MRRETURN(MATCH_NOMATCH);
3099 ph10 443 }
3100 nigel 77 ecode++;
3101     GETCHARINCTEST(c, eptr);
3102     if ((ims & PCRE_CASELESS) != 0)
3103     {
3104     #ifdef SUPPORT_UTF8
3105     if (c < 256)
3106     #endif
3107     c = md->lcc[c];
3108 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3109 nigel 77 }
3110     else
3111     {
3112 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3113 nigel 77 }
3114     break;
3115    
3116     /* Match a negated single one-byte character repeatedly. This is almost a
3117     repeat of the code for a repeated single character, but I haven't found a
3118     nice way of commoning these up that doesn't require a test of the
3119     positive/negative option for each character match. Maybe that wouldn't add
3120     very much to the time taken, but character matching *is* what this is all
3121     about... */
3122    
3123     case OP_NOTEXACT:
3124     min = max = GET2(ecode, 1);
3125     ecode += 3;
3126     goto REPEATNOTCHAR;
3127    
3128     case OP_NOTUPTO:
3129     case OP_NOTMINUPTO:
3130     min = 0;
3131     max = GET2(ecode, 1);
3132     minimize = *ecode == OP_NOTMINUPTO;
3133     ecode += 3;
3134     goto REPEATNOTCHAR;
3135    
3136 nigel 93 case OP_NOTPOSSTAR:
3137     possessive = TRUE;
3138     min = 0;
3139     max = INT_MAX;
3140     ecode++;
3141     goto REPEATNOTCHAR;
3142    
3143     case OP_NOTPOSPLUS:
3144     possessive = TRUE;
3145     min = 1;
3146     max = INT_MAX;
3147     ecode++;
3148     goto REPEATNOTCHAR;
3149    
3150     case OP_NOTPOSQUERY:
3151     possessive = TRUE;
3152     min = 0;
3153     max = 1;
3154     ecode++;
3155     goto REPEATNOTCHAR;
3156    
3157     case OP_NOTPOSUPTO:
3158     possessive = TRUE;
3159     min = 0;
3160     max = GET2(ecode, 1);
3161     ecode += 3;
3162     goto REPEATNOTCHAR;
3163    
3164 nigel 77 case OP_NOTSTAR:
3165     case OP_NOTMINSTAR:
3166     case OP_NOTPLUS:
3167     case OP_NOTMINPLUS:
3168     case OP_NOTQUERY:
3169     case OP_NOTMINQUERY:
3170     c = *ecode++ - OP_NOTSTAR;
3171     minimize = (c & 1) != 0;
3172     min = rep_min[c]; /* Pick up values from tables; */
3173     max = rep_max[c]; /* zero for max => infinity */
3174     if (max == 0) max = INT_MAX;
3175    
3176 ph10 426 /* Common code for all repeated single-byte matches. */
3177 nigel 77
3178     REPEATNOTCHAR:
3179     fc = *ecode++;
3180    
3181     /* The code is duplicated for the caseless and caseful cases, for speed,
3182     since matching characters is likely to be quite common. First, ensure the
3183     minimum number of matches are present. If min = max, continue at the same
3184     level without recursing. Otherwise, if minimizing, keep trying the rest of
3185     the expression and advancing one matching character if failing, up to the
3186     maximum. Alternatively, if maximizing, find the maximum number of
3187     characters and work backwards. */
3188    
3189     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3190     max, eptr));
3191    
3192     if ((ims & PCRE_CASELESS) != 0)
3193     {
3194     fc = md->lcc[fc];
3195    
3196     #ifdef SUPPORT_UTF8
3197     /* UTF-8 mode */
3198     if (utf8)
3199     {
3200 nigel 93 register unsigned int d;
3201 nigel 77 for (i = 1; i <= min; i++)
3202     {
3203 ph10 426 if (eptr >= md->end_subject)
3204     {
3205     SCHECK_PARTIAL();
3206 ph10 510 MRRETURN(MATCH_NOMATCH);
3207 ph10 427 }
3208 nigel 77 GETCHARINC(d, eptr);
3209     if (d < 256) d = md->lcc[d];
3210 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3211 nigel 77 }
3212     }
3213     else
3214     #endif
3215    
3216     /* Not UTF-8 mode */
3217     {
3218     for (i = 1; i <= min; i++)
3219 ph10 426 {
3220     if (eptr >= md->end_subject)
3221     {
3222     SCHECK_PARTIAL();
3223 ph10 510 MRRETURN(MATCH_NOMATCH);
3224 ph10 427 }
3225 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3226 ph10 427 }
3227 nigel 77 }
3228    
3229     if (min == max) continue;
3230    
3231     if (minimize)
3232     {
3233     #ifdef SUPPORT_UTF8
3234     /* UTF-8 mode */
3235     if (utf8)
3236     {
3237 nigel 93 register unsigned int d;
3238 nigel 77 for (fi = min;; fi++)
3239     {
3240 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3241 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3242 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3243 ph10 427 if (eptr >= md->end_subject)
3244 ph10 426 {
3245 ph10 427 SCHECK_PARTIAL();
3246 ph10 510 MRRETURN(MATCH_NOMATCH);
3247 ph10 427 }
3248 nigel 77 GETCHARINC(d, eptr);
3249     if (d < 256) d = md->lcc[d];
3250 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3251 nigel 77 }
3252     }
3253     else
3254     #endif
3255     /* Not UTF-8 mode */
3256     {
3257     for (fi = min;; fi++)
3258     {
3259 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3260 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3261 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3262 ph10 426 if (eptr >= md->end_subject)
3263     {
3264     SCHECK_PARTIAL();
3265 ph10 510 MRRETURN(MATCH_NOMATCH);
3266 ph10 426 }
3267 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3268 nigel 77 }
3269     }
3270     /* Control never gets here */
3271     }
3272    
3273     /* Maximize case */
3274    
3275     else
3276     {
3277     pp = eptr;
3278    
3279     #ifdef SUPPORT_UTF8
3280     /* UTF-8 mode */
3281     if (utf8)
3282     {
3283 nigel 93 register unsigned int d;
3284 nigel 77 for (i = min; i < max; i++)
3285     {
3286     int len = 1;
3287 ph10 463 if (eptr >= md->end_subject)
3288 ph10 462 {
3289 ph10 463 SCHECK_PARTIAL();
3290 ph10 462 break;
3291 ph10 463 }
3292 nigel 77 GETCHARLEN(d, eptr, len);
3293     if (d < 256) d = md->lcc[d];
3294     if (fc == d) break;
3295     eptr += len;
3296     }
3297 nigel 93 if (possessive) continue;
3298     for(;;)
3299 nigel 77 {
3300 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3301 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3302     if (eptr-- == pp) break; /* Stop if tried at original pos */
3303     BACKCHAR(eptr);
3304     }
3305     }
3306     else
3307     #endif
3308     /* Not UTF-8 mode */
3309     {
3310     for (i = min; i < max; i++)
3311     {
3312 ph10 463 if (eptr >= md->end_subject)
3313 ph10 462 {
3314     SCHECK_PARTIAL();
3315     break;
3316 ph10 463 }
3317 ph10 462 if (fc == md->lcc[*eptr]) break;
3318 nigel 77 eptr++;
3319     }
3320 nigel 93 if (possessive) continue;
3321 nigel 77 while (eptr >= pp)
3322     {
3323 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3324 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3325     eptr--;
3326     }
3327     }
3328    
3329 ph10 510 MRRETURN(MATCH_NOMATCH);
3330 nigel 77 }
3331     /* Control never gets here */
3332     }
3333    
3334     /* Caseful comparisons */
3335    
3336     else
3337     {
3338     #ifdef SUPPORT_UTF8
3339     /* UTF-8 mode */
3340     if (utf8)
3341     {
3342 nigel 93 register unsigned int d;
3343 nigel 77 for (i = 1; i <= min; i++)
3344     {
3345 ph10 426 if (eptr >= md->end_subject)
3346     {
3347     SCHECK_PARTIAL();
3348 ph10 510 MRRETURN(MATCH_NOMATCH);
3349 ph10 427 }
3350 nigel 77 GETCHARINC(d, eptr);
3351 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3352 nigel 77 }
3353     }
3354     else
3355     #endif
3356     /* Not UTF-8 mode */
3357     {
3358     for (i = 1; i <= min; i++)
3359 ph10 426 {
3360     if (eptr >= md->end_subject)
3361     {
3362     SCHECK_PARTIAL();
3363 ph10 510 MRRETURN(MATCH_NOMATCH);
3364 ph10 427 }
3365 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3366 ph10 427 }
3367 nigel 77 }
3368    
3369     if (min == max) continue;
3370    
3371     if (minimize)
3372     {
3373     #ifdef SUPPORT_UTF8
3374     /* UTF-8 mode */
3375     if (utf8)
3376     {
3377 nigel 93 register unsigned int d;
3378 nigel 77 for (fi = min;; fi++)
3379     {
3380 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3381 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3382 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3383 ph10 427 if (eptr >= md->end_subject)
3384 ph10 426 {
3385 ph10 427 SCHECK_PARTIAL();
3386 ph10 510 MRRETURN(MATCH_NOMATCH);
3387 ph10 427 }
3388 nigel 77 GETCHARINC(d, eptr);
3389 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3390 nigel 77 }
3391     }
3392     else
3393     #endif
3394     /* Not UTF-8 mode */
3395     {
3396     for (fi = min;; fi++)
3397     {
3398 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3399 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3400 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3401 ph10 426 if (eptr >= md->end_subject)
3402     {
3403     SCHECK_PARTIAL();
3404 ph10 510 MRRETURN(MATCH_NOMATCH);
3405 ph10 427 }
3406 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3407 nigel 77 }
3408     }
3409     /* Control never gets here */
3410     }
3411    
3412     /* Maximize case */
3413    
3414     else
3415     {
3416     pp = eptr;
3417    
3418     #ifdef SUPPORT_UTF8
3419     /* UTF-8 mode */
3420     if (utf8)
3421     {
3422 nigel 93 register unsigned int d;
3423 nigel 77 for (i = min; i < max; i++)
3424     {
3425     int len = 1;
3426 ph10 463 if (eptr >= md->end_subject)
3427 ph10 462 {
3428 ph10 463 SCHECK_PARTIAL();
3429 ph10 462 break;
3430 ph10 463 }
3431 nigel 77 GETCHARLEN(d, eptr, len);
3432     if (fc == d) break;
3433     eptr += len;
3434     }
3435 nigel 93 if (possessive) continue;
3436 nigel 77 for(;;)
3437     {
3438 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3439 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440     if (eptr-- == pp) break; /* Stop if tried at original pos */
3441     BACKCHAR(eptr);
3442     }
3443     }
3444     else
3445     #endif
3446     /* Not UTF-8 mode */
3447     {
3448     for (i = min; i < max; i++)
3449     {
3450 ph10 463 if (eptr >= md->end_subject)
3451 ph10 462 {
3452 ph10 463 SCHECK_PARTIAL();
3453 ph10 462 break;
3454 ph10 463 }
3455 ph10 462 if (fc == *eptr) break;
3456 nigel 77 eptr++;
3457     }
3458 nigel 93 if (possessive) continue;
3459 nigel 77 while (eptr >= pp)
3460     {
3461 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3462 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3463     eptr--;
3464     }
3465     }
3466    
3467 ph10 510 MRRETURN(MATCH_NOMATCH);
3468 nigel 77 }
3469     }
3470     /* Control never gets here */
3471    
3472     /* Match a single character type repeatedly; several different opcodes
3473     share code. This is very similar to the code for single characters, but we
3474     repeat it in the interests of efficiency. */
3475    
3476     case OP_TYPEEXACT:
3477     min = max = GET2(ecode, 1);
3478     minimize = TRUE;
3479     ecode += 3;
3480     goto REPEATTYPE;
3481    
3482     case OP_TYPEUPTO:
3483     case OP_TYPEMINUPTO:
3484     min = 0;
3485     max = GET2(ecode, 1);
3486     minimize = *ecode == OP_TYPEMINUPTO;
3487     ecode += 3;
3488     goto REPEATTYPE;
3489    
3490 nigel 93 case OP_TYPEPOSSTAR:
3491     possessive = TRUE;
3492     min = 0;
3493     max = INT_MAX;
3494     ecode++;
3495     goto REPEATTYPE;
3496    
3497     case OP_TYPEPOSPLUS:
3498     possessive = TRUE;
3499     min = 1;
3500     max = INT_MAX;
3501     ecode++;
3502     goto REPEATTYPE;
3503    
3504     case OP_TYPEPOSQUERY:
3505     possessive = TRUE;
3506     min = 0;
3507     max = 1;
3508     ecode++;
3509     goto REPEATTYPE;
3510    
3511     case OP_TYPEPOSUPTO:
3512     possessive = TRUE;
3513     min = 0;
3514     max = GET2(ecode, 1);
3515     ecode += 3;
3516     goto REPEATTYPE;
3517    
3518 nigel 77 case OP_TYPESTAR:
3519     case OP_TYPEMINSTAR:
3520     case OP_TYPEPLUS:
3521     case OP_TYPEMINPLUS:
3522     case OP_TYPEQUERY:
3523     case OP_TYPEMINQUERY:
3524     c = *ecode++ - OP_TYPESTAR;
3525     minimize = (c & 1) != 0;
3526     min = rep_min[c]; /* Pick up values from tables; */
3527     max = rep_max[c]; /* zero for max => infinity */
3528     if (max == 0) max = INT_MAX;
3529    
3530     /* Common code for all repeated single character type matches. Note that
3531     in UTF-8 mode, '.' matches a character of any length, but for the other
3532     character types, the valid characters are all one-byte long. */
3533    
3534     REPEATTYPE:
3535     ctype = *ecode++; /* Code for the character type */
3536    
3537     #ifdef SUPPORT_UCP
3538     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3539     {
3540     prop_fail_result = ctype == OP_NOTPROP;
3541     prop_type = *ecode++;
3542 nigel 87 prop_value = *ecode++;
3543 nigel 77 }
3544     else prop_type = -1;
3545     #endif
3546    
3547     /* First, ensure the minimum number of matches are present. Use inline
3548     code for maximizing the speed, and do the type test once at the start
3549 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3550 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3551     and single-bytes. */
3552    
3553     if (min > 0)
3554     {
3555     #ifdef SUPPORT_UCP
3556 nigel 87 if (prop_type >= 0)
3557 nigel 77 {
3558 nigel 87 switch(prop_type)
3559 nigel 77 {
3560 nigel 87 case PT_ANY:
3561 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3562 nigel 87 for (i = 1; i <= min; i++)
3563     {
3564 ph10 427 if (eptr >= md->end_subject)
3565 ph10 426 {
3566 ph10 427 SCHECK_PARTIAL();
3567 ph10 510 MRRETURN(MATCH_NOMATCH);
3568 ph10 427 }
3569 ph10 184 GETCHARINCTEST(c, eptr);
3570 nigel 87 }
3571     break;
3572    
3573     case PT_LAMP:
3574     for (i = 1; i <= min; i++)
3575     {
3576 ph10 427 if (eptr >= md->end_subject)
3577 ph10 426 {
3578 ph10 427 SCHECK_PARTIAL();
3579 ph10 510 MRRETURN(MATCH_NOMATCH);
3580 ph10 427 }
3581 ph10 184 GETCHARINCTEST(c, eptr);
3582 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3583 nigel 87 if ((prop_chartype == ucp_Lu ||
3584     prop_chartype == ucp_Ll ||
3585     prop_chartype == ucp_Lt) == prop_fail_result)
3586 ph10 510 MRRETURN(MATCH_NOMATCH);
3587 nigel 87 }
3588     break;
3589    
3590     case PT_GC:
3591     for (i = 1; i <= min; i++)
3592     {
3593 ph10 427 if (eptr >= md->end_subject)
3594 ph10 426 {
3595 ph10 427 SCHECK_PARTIAL();
3596 ph10 510 MRRETURN(MATCH_NOMATCH);
3597 ph10 427 }
3598 ph10 184 GETCHARINCTEST(c, eptr);
3599 ph10 349 prop_category = UCD_CATEGORY(c);
3600 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3601 ph10 510 MRRETURN(MATCH_NOMATCH);
3602 nigel 87 }
3603     break;
3604    
3605     case PT_PC:
3606     for (i = 1; i <= min; i++)
3607     {
3608 ph10 427 if (eptr >= md->end_subject)
3609 ph10 426 {
3610 ph10 427 SCHECK_PARTIAL();
3611 ph10 510 MRRETURN(MATCH_NOMATCH);
3612 ph10 427 }
3613 ph10 184 GETCHARINCTEST(c, eptr);
3614 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3615 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3616 ph10 510 MRRETURN(MATCH_NOMATCH);
3617 nigel 87 }
3618     break;
3619    
3620     case PT_SC:
3621     for (i = 1; i <= min; i++)
3622     {
3623 ph10 427 if (eptr >= md->end_subject)
3624 ph10 426 {
3625 ph10 427 SCHECK_PARTIAL();
3626 ph10 510 MRRETURN(MATCH_NOMATCH);
3627 ph10 427 }
3628 ph10 184 GETCHARINCTEST(c, eptr);
3629 ph10 349 prop_script = UCD_SCRIPT(c);
3630 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3631 ph10 510 MRRETURN(MATCH_NOMATCH);
3632 nigel 87 }
3633     break;
3634 ph10 527
3635 ph10 517 case PT_ALNUM:
3636     for (i = 1; i <= min; i++)
3637     {
3638     if (eptr >= md->end_subject)
3639     {
3640     SCHECK_PARTIAL();
3641     MRRETURN(MATCH_NOMATCH);
3642     }
3643     GETCHARINCTEST(c, eptr);
3644 ph10 527 prop_category = UCD_CATEGORY(c);
3645     if ((prop_category == ucp_L || prop_category == ucp_N)
3646 ph10 517 == prop_fail_result)
3647     MRRETURN(MATCH_NOMATCH);
3648     }
3649     break;
3650 ph10 527
3651 ph10 517 case PT_SPACE: /* Perl space */
3652     for (i = 1; i <= min; i++)
3653     {
3654     if (eptr >= md->end_subject)
3655     {
3656     SCHECK_PARTIAL();
3657     MRRETURN(MATCH_NOMATCH);
3658     }
3659     GETCHARINCTEST(c, eptr);
3660 ph10 527 prop_category = UCD_CATEGORY(c);
3661     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3662     c == CHAR_FF || c == CHAR_CR)
3663 ph10 517 == prop_fail_result)
3664     MRRETURN(MATCH_NOMATCH);
3665     }
3666     break;
3667 ph10 527
3668 ph10 517 case PT_PXSPACE: /* POSIX space */
3669     for (i = 1; i <= min; i++)
3670     {
3671     if (eptr >= md->end_subject)
3672     {
3673     SCHECK_PARTIAL();
3674     MRRETURN(MATCH_NOMATCH);
3675     }
3676     GETCHARINCTEST(c, eptr);
3677 ph10 527 prop_category = UCD_CATEGORY(c);
3678     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3679     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3680 ph10 517 == prop_fail_result)
3681     MRRETURN(MATCH_NOMATCH);
3682     }
3683     break;
3684 ph10 527
3685     case PT_WORD:
3686 ph10 517 for (i = 1; i <= min; i++)
3687     {
3688     if (eptr >= md->end_subject)
3689     {
3690     SCHECK_PARTIAL();
3691     MRRETURN(MATCH_NOMATCH);
3692     }
3693     GETCHARINCTEST(c, eptr);
3694 ph10 527 prop_category = UCD_CATEGORY(c);
3695 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3696 ph10 527 c == CHAR_UNDERSCORE)
3697 ph10 517 == prop_fail_result)
3698     MRRETURN(MATCH_NOMATCH);
3699     }
3700     break;
3701 ph10 527
3702 ph10 517 /* This should not occur */
3703 nigel 87
3704     default:
3705     RRETURN(PCRE_ERROR_INTERNAL);
3706 nigel 77 }
3707     }
3708    
3709     /* Match extended Unicode sequences. We will get here only if the
3710     support is in the binary; otherwise a compile-time error occurs. */
3711    
3712     else if (ctype == OP_EXTUNI)
3713     {
3714     for (i = 1; i <= min; i++)
3715     {
3716 ph10 427 if (eptr >= md->end_subject)
3717 ph10 426 {
3718 ph10 427 SCHECK_PARTIAL();
3719 ph10 510 MRRETURN(MATCH_NOMATCH);
3720 ph10 427 }
3721 nigel 77 GETCHARINCTEST(c, eptr);
3722 ph10 349 prop_category = UCD_CATEGORY(c);
3723 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3724 nigel 77 while (eptr < md->end_subject)
3725     {
3726     int len = 1;
3727 ph10 426 if (!utf8) c = *eptr;
3728     else { GETCHARLEN(c, eptr, len); }
3729 ph10 349 prop_category = UCD_CATEGORY(c);
3730 nigel 77 if (prop_category != ucp_M) break;
3731     eptr += len;
3732     }
3733     }
3734     }
3735    
3736     else
3737     #endif /* SUPPORT_UCP */
3738    
3739     /* Handle all other cases when the coding is UTF-8 */
3740    
3741     #ifdef SUPPORT_UTF8
3742     if (utf8) switch(ctype)
3743     {
3744     case OP_ANY:
3745     for (i = 1; i <= min; i++)
3746     {
3747 ph10 426 if (eptr >= md->end_subject)
3748     {
3749 ph10 427 SCHECK_PARTIAL();
3750 ph10 510 MRRETURN(MATCH_NOMATCH);
3751 ph10 427 }
3752 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3753 nigel 91 eptr++;
3754 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3755     }
3756     break;
3757    
3758 ph10 341 case OP_ALLANY:
3759     for (i = 1; i <= min; i++)
3760     {
3761 ph10 427 if (eptr >= md->end_subject)
3762 ph10 426 {
3763     SCHECK_PARTIAL();
3764 ph10 510 MRRETURN(MATCH_NOMATCH);
3765 ph10 427 }
3766 ph10 341 eptr++;
3767     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3768     }
3769     break;
3770    
3771 nigel 77 case OP_ANYBYTE:
3772 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3773 nigel 77 eptr += min;
3774     break;
3775    
3776 nigel 93 case OP_ANYNL:
3777     for (i = 1; i <= min; i++)
3778     {
3779 ph10 427 if (eptr >= md->end_subject)
3780 ph10 426 {
3781     SCHECK_PARTIAL();
3782 ph10 510 MRRETURN(MATCH_NOMATCH);
3783 ph10 427 }
3784 nigel 93 GETCHARINC(c, eptr);
3785     switch(c)
3786     {
3787 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3788 nigel 93 case 0x000d:
3789     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790     break;
3791 ph10 231
3792 nigel 93 case 0x000a:
3793 ph10 231 break;
3794    
3795 nigel 93 case 0x000b:
3796     case 0x000c:
3797     case 0x0085:
3798     case 0x2028:
3799     case 0x2029:
3800 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801 nigel 93 break;
3802     }
3803     }
3804     break;
3805    
3806 ph10 178 case OP_NOT_HSPACE:
3807     for (i = 1; i <= min; i++)
3808     {
3809 ph10 427 if (eptr >= md->end_subject)
3810 ph10 426 {
3811     SCHECK_PARTIAL();
3812 ph10 510 MRRETURN(MATCH_NOMATCH);
3813 ph10 427 }
3814 ph10 178 GETCHARINC(c, eptr);
3815     switch(c)
3816     {
3817     default: break;
3818     case 0x09: /* HT */
3819     case 0x20: /* SPACE */
3820     case 0xa0: /* NBSP */
3821     case 0x1680: /* OGHAM SPACE MARK */
3822     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3823     case 0x2000: /* EN QUAD */
3824     case 0x2001: /* EM QUAD */
3825     case 0x2002: /* EN SPACE */
3826