/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 534 - (hide annotations) (download)
Thu Jun 3 18:26:05 2010 UTC (3 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 185174 byte(s)
Correct typo in recent malloc check fix.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259     RM61, RM62 };
260 ph10 164
261 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
262 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 ph10 501 actually used in this definition. */
264 nigel 77
265     #ifndef NO_RECURSE
266     #define REGISTER register
267 ph10 164
268 ph10 475 #ifdef PCRE_DEBUG
269 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 nigel 87 { \
271     printf("match() called in line %d\n", __LINE__); \
272 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 nigel 87 printf("to line %d\n", __LINE__); \
274     }
275     #define RRETURN(ra) \
276     { \
277     printf("match() returned %d from line %d ", ra, __LINE__); \
278     return ra; \
279     }
280     #else
281 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 nigel 77 #define RRETURN(ra) return ra
284 nigel 87 #endif
285    
286 nigel 77 #else
287    
288    
289 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
290     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291     argument of match(), which never changes. */
292 nigel 77
293     #define REGISTER
294    
295 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 nigel 77 {\
297     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 ph10 164 frame->Xwhere = rw; \
300     newframe->Xeptr = ra;\
301     newframe->Xecode = rb;\
302 ph10 168 newframe->Xmstart = mstart;\
303 ph10 501 newframe->Xmarkptr = markptr;\
304 ph10 164 newframe->Xoffset_top = rc;\
305     newframe->Xims = re;\
306     newframe->Xeptrb = rf;\
307     newframe->Xflags = rg;\
308     newframe->Xrdepth = frame->Xrdepth + 1;\
309     newframe->Xprevframe = frame;\
310     frame = newframe;\
311     DPRINTF(("restarting from line %d\n", __LINE__));\
312     goto HEAP_RECURSE;\
313     L_##rw:\
314     DPRINTF(("jumped back to line %d\n", __LINE__));\
315 nigel 77 }
316    
317     #define RRETURN(ra)\
318     {\
319 ph10 527 heapframe *oldframe = frame;\
320     frame = oldframe->Xprevframe;\
321     (pcre_stack_free)(oldframe);\
322 nigel 77 if (frame != NULL)\
323     {\
324 ph10 164 rrc = ra;\
325     goto HEAP_RETURN;\
326 nigel 77 }\
327     return ra;\
328     }
329    
330    
331     /* Structure for remembering the local variables in a private frame */
332    
333     typedef struct heapframe {
334     struct heapframe *Xprevframe;
335    
336     /* Function arguments that may change */
337    
338 ph10 409 USPTR Xeptr;
339 nigel 77 const uschar *Xecode;
340 ph10 409 USPTR Xmstart;
341 ph10 501 USPTR Xmarkptr;
342 nigel 77 int Xoffset_top;
343     long int Xims;
344     eptrblock *Xeptrb;
345     int Xflags;
346 nigel 91 unsigned int Xrdepth;
347 nigel 77
348     /* Function local variables */
349    
350 ph10 409 USPTR Xcallpat;
351 ph10 406 #ifdef SUPPORT_UTF8
352 ph10 409 USPTR Xcharptr;
353 ph10 406 #endif
354 ph10 409 USPTR Xdata;
355     USPTR Xnext;
356     USPTR Xpp;
357     USPTR Xprev;
358     USPTR Xsaved_eptr;
359 nigel 77
360     recursion_info Xnew_recursive;
361    
362     BOOL Xcur_is_word;
363     BOOL Xcondition;
364     BOOL Xprev_is_word;
365    
366     unsigned long int Xoriginal_ims;
367    
368     #ifdef SUPPORT_UCP
369     int Xprop_type;
370 nigel 87 int Xprop_value;
371 nigel 77 int Xprop_fail_result;
372     int Xprop_category;
373     int Xprop_chartype;
374 nigel 87 int Xprop_script;
375 ph10 123 int Xoclength;
376     uschar Xocchars[8];
377 nigel 77 #endif
378    
379 ph10 403 int Xcodelink;
380 nigel 77 int Xctype;
381 nigel 93 unsigned int Xfc;
382 nigel 77 int Xfi;
383     int Xlength;
384     int Xmax;
385     int Xmin;
386     int Xnumber;
387     int Xoffset;
388     int Xop;
389     int Xsave_capture_last;
390     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
391     int Xstacksave[REC_STACK_SAVE_MAX];
392    
393     eptrblock Xnewptrb;
394    
395 ph10 164 /* Where to jump back to */
396 nigel 77
397 ph10 164 int Xwhere;
398 ph10 165
399 nigel 77 } heapframe;
400    
401     #endif
402    
403    
404     /***************************************************************************
405     ***************************************************************************/
406    
407    
408    
409     /*************************************************
410     * Match from current position *
411     *************************************************/
412    
413 nigel 93 /* This function is called recursively in many circumstances. Whenever it
414 nigel 77 returns a negative (error) response, the outer incarnation must also return the
415 ph10 426 same response. */
416 nigel 77
417 ph10 426 /* These macros pack up tests that are used for partial matching, and which
418     appears several times in the code. We set the "hit end" flag if the pointer is
419     at the end of the subject and also past the start of the subject (i.e.
420 ph10 427 something has been matched). For hard partial matching, we then return
421     immediately. The second one is used when we already know we are past the end of
422     the subject. */
423 ph10 426
424     #define CHECK_PARTIAL()\
425 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426 ph10 427 {\
427     md->hitend = TRUE;\
428 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429 ph10 427 }
430 ph10 426
431     #define SCHECK_PARTIAL()\
432 ph10 462 if (md->partial != 0 && eptr > mstart)\
433 ph10 427 {\
434     md->hitend = TRUE;\
435 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436 ph10 427 }
437 ph10 426
438 ph10 427
439 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
440     the md structure (e.g. utf8, end_subject) into individual variables to improve
441 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
442     made performance worse.
443    
444     Arguments:
445 nigel 93 eptr pointer to current character in subject
446     ecode pointer to current position in compiled code
447 ph10 168 mstart pointer to the current match start position (can be modified
448 ph10 172 by encountering \K)
449 ph10 501 markptr pointer to the most recent MARK name, or NULL
450 nigel 77 offset_top current top pointer
451     md pointer to "static" info for the match
452     ims current /i, /m, and /s options
453     eptrb pointer to chain of blocks containing eptr at start of
454     brackets - for testing for empty matches
455     flags can contain
456     match_condassert - this is an assertion condition
457 nigel 93 match_cbegroup - this is the start of an unlimited repeat
458     group that can match an empty string
459 nigel 87 rdepth the recursion depth
460 nigel 77
461     Returns: MATCH_MATCH if matched ) these values are >= 0
462     MATCH_NOMATCH if failed to match )
463 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
464 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
465 nigel 87 (e.g. stopped by repeated call or recursion limit)
466 nigel 77 */
467    
468     static int
469 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
472 nigel 77 {
473     /* These variables do not need to be preserved over recursion in this function,
474 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
475     "register" because they are used a lot in loops. */
476 nigel 77
477 nigel 91 register int rrc; /* Returns from recursive calls */
478     register int i; /* Used for loops not involving calls to RMATCH() */
479 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
480 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
481 nigel 77
482 nigel 93 BOOL minimize, possessive; /* Quantifier options */
483 ph10 403 int condcode;
484 nigel 93
485 nigel 77 /* When recursion is not being used, all "local" variables that have to be
486     preserved over calls to RMATCH() are part of a "frame" which is obtained from
487     heap storage. Set up the top-level frame here; others are obtained from the
488     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
489    
490     #ifdef NO_RECURSE
491     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
494    
495     /* Copy in the original argument variables */
496    
497     frame->Xeptr = eptr;
498     frame->Xecode = ecode;
499 ph10 168 frame->Xmstart = mstart;
500 ph10 501 frame->Xmarkptr = markptr;
501 nigel 77 frame->Xoffset_top = offset_top;
502     frame->Xims = ims;
503     frame->Xeptrb = eptrb;
504     frame->Xflags = flags;
505 nigel 87 frame->Xrdepth = rdepth;
506 nigel 77
507     /* This is where control jumps back to to effect "recursion" */
508    
509     HEAP_RECURSE:
510    
511     /* Macros make the argument variables come from the current frame */
512    
513     #define eptr frame->Xeptr
514     #define ecode frame->Xecode
515 ph10 168 #define mstart frame->Xmstart
516 ph10 501 #define markptr frame->Xmarkptr
517 nigel 77 #define offset_top frame->Xoffset_top
518     #define ims frame->Xims
519     #define eptrb frame->Xeptrb
520     #define flags frame->Xflags
521 nigel 87 #define rdepth frame->Xrdepth
522 nigel 77
523     /* Ditto for the local variables */
524    
525     #ifdef SUPPORT_UTF8
526     #define charptr frame->Xcharptr
527     #endif
528     #define callpat frame->Xcallpat
529 ph10 403 #define codelink frame->Xcodelink
530 nigel 77 #define data frame->Xdata
531     #define next frame->Xnext
532     #define pp frame->Xpp
533     #define prev frame->Xprev
534     #define saved_eptr frame->Xsaved_eptr
535    
536     #define new_recursive frame->Xnew_recursive
537    
538     #define cur_is_word frame->Xcur_is_word
539     #define condition frame->Xcondition
540     #define prev_is_word frame->Xprev_is_word
541    
542     #define original_ims frame->Xoriginal_ims
543    
544     #ifdef SUPPORT_UCP
545     #define prop_type frame->Xprop_type
546 nigel 87 #define prop_value frame->Xprop_value
547 nigel 77 #define prop_fail_result frame->Xprop_fail_result
548     #define prop_category frame->Xprop_category
549     #define prop_chartype frame->Xprop_chartype
550 nigel 87 #define prop_script frame->Xprop_script
551 ph10 115 #define oclength frame->Xoclength
552     #define occhars frame->Xocchars
553 nigel 77 #endif
554    
555     #define ctype frame->Xctype
556     #define fc frame->Xfc
557     #define fi frame->Xfi
558     #define length frame->Xlength
559     #define max frame->Xmax
560     #define min frame->Xmin
561     #define number frame->Xnumber
562     #define offset frame->Xoffset
563     #define op frame->Xop
564     #define save_capture_last frame->Xsave_capture_last
565     #define save_offset1 frame->Xsave_offset1
566     #define save_offset2 frame->Xsave_offset2
567     #define save_offset3 frame->Xsave_offset3
568     #define stacksave frame->Xstacksave
569    
570     #define newptrb frame->Xnewptrb
571    
572     /* When recursion is being used, local variables are allocated on the stack and
573     get preserved during recursion in the normal way. In this environment, fi and
574     i, and fc and c, can be the same variables. */
575    
576 nigel 93 #else /* NO_RECURSE not defined */
577 nigel 77 #define fi i
578     #define fc c
579    
580    
581 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
582     const uschar *charptr; /* in small blocks of the code. My normal */
583     #endif /* style of coding would have declared */
584     const uschar *callpat; /* them within each of those blocks. */
585     const uschar *data; /* However, in order to accommodate the */
586     const uschar *next; /* version of this code that uses an */
587     USPTR pp; /* external "stack" implemented on the */
588     const uschar *prev; /* heap, it is easier to declare them all */
589     USPTR saved_eptr; /* here, so the declarations can be cut */
590     /* out in a block. The only declarations */
591     recursion_info new_recursive; /* within blocks below are for variables */
592     /* that do not have to be preserved over */
593     BOOL cur_is_word; /* a recursive call to RMATCH(). */
594     BOOL condition;
595 nigel 77 BOOL prev_is_word;
596    
597     unsigned long int original_ims;
598    
599     #ifdef SUPPORT_UCP
600     int prop_type;
601 nigel 87 int prop_value;
602 nigel 77 int prop_fail_result;
603     int prop_category;
604     int prop_chartype;
605 nigel 87 int prop_script;
606 ph10 115 int oclength;
607     uschar occhars[8];
608 nigel 77 #endif
609    
610 ph10 399 int codelink;
611 nigel 77 int ctype;
612     int length;
613     int max;
614     int min;
615     int number;
616     int offset;
617     int op;
618     int save_capture_last;
619     int save_offset1, save_offset2, save_offset3;
620     int stacksave[REC_STACK_SAVE_MAX];
621    
622     eptrblock newptrb;
623 nigel 93 #endif /* NO_RECURSE */
624 nigel 77
625     /* These statements are here to stop the compiler complaining about unitialized
626     variables. */
627    
628     #ifdef SUPPORT_UCP
629 nigel 87 prop_value = 0;
630 nigel 77 prop_fail_result = 0;
631     #endif
632    
633 nigel 93
634 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
635     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
636     used. Thanks to Ian Taylor for noticing this possibility and sending the
637     original patch. */
638    
639     TAIL_RECURSE:
640    
641 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
642     are specified by the macro RMATCH and RRETURN is used to return. When
643     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
646     complicated macro. It has to be used in one particular way. This shouldn't,
647     however, impact performance when true recursion is being used. */
648 nigel 77
649 ph10 164 #ifdef SUPPORT_UTF8
650     utf8 = md->utf8; /* Local copy of the flag */
651     #else
652     utf8 = FALSE;
653     #endif
654    
655 nigel 87 /* First check that we haven't called match() too many times, or that we
656     haven't exceeded the recursive call limit. */
657    
658 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
659 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
660 nigel 77
661     original_ims = ims; /* Save for resetting on ')' */
662 nigel 91
663 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
664     string, the match_cbegroup flag is set. When this is the case, add the current
665     subject pointer to the chain of such remembered pointers, to be checked when we
666     hit the closing ket, in order to break infinite loops that match no characters.
667 ph10 197 When match() is called in other circumstances, don't add to the chain. The
668     match_cbegroup flag must NOT be used with tail recursion, because the memory
669     block that is used is on the stack, so a new one may be required for each
670     match(). */
671 nigel 77
672 nigel 93 if ((flags & match_cbegroup) != 0)
673 nigel 77 {
674 ph10 197 newptrb.epb_saved_eptr = eptr;
675     newptrb.epb_prev = eptrb;
676     eptrb = &newptrb;
677 nigel 77 }
678    
679 nigel 93 /* Now start processing the opcodes. */
680 nigel 77
681     for (;;)
682     {
683 nigel 93 minimize = possessive = FALSE;
684 nigel 77 op = *ecode;
685 ph10 443
686 nigel 93 switch(op)
687     {
688 ph10 510 case OP_MARK:
689     markptr = ecode + 2;
690     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691 ph10 512 ims, eptrb, flags, RM55);
692    
693     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694     argument, and we must check whether that argument matches this MARK's
695     argument. It is passed back in md->start_match_ptr (an overloading of that
696     variable). If it does match, we reset that variable to the current subject
697     position and return MATCH_SKIP. Otherwise, pass back the return code
698 ph10 510 unaltered. */
699 ph10 512
700     if (rrc == MATCH_SKIP_ARG &&
701 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702     {
703     md->start_match_ptr = eptr;
704     RRETURN(MATCH_SKIP);
705     }
706    
707 ph10 512 if (md->mark == NULL) md->mark = markptr;
708 ph10 510 RRETURN(rrc);
709    
710 ph10 210 case OP_FAIL:
711 ph10 510 MRRETURN(MATCH_NOMATCH);
712 ph10 211
713 ph10 510 case OP_COMMIT:
714     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
715     ims, eptrb, flags, RM52);
716     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
717     MRRETURN(MATCH_COMMIT);
718    
719 ph10 210 case OP_PRUNE:
720     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
721     ims, eptrb, flags, RM51);
722     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
723 ph10 510 MRRETURN(MATCH_PRUNE);
724 ph10 211
725 ph10 510 case OP_PRUNE_ARG:
726     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
727 ph10 512 ims, eptrb, flags, RM56);
728 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
729 ph10 510 md->mark = ecode + 2;
730     RRETURN(MATCH_PRUNE);
731 ph10 211
732 ph10 210 case OP_SKIP:
733     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734     ims, eptrb, flags, RM53);
735     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
736 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
737 ph10 510 MRRETURN(MATCH_SKIP);
738 ph10 211
739 ph10 510 case OP_SKIP_ARG:
740     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
741 ph10 512 ims, eptrb, flags, RM57);
742 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
743 ph10 512
744     /* Pass back the current skip name by overloading md->start_match_ptr and
745     returning the special MATCH_SKIP_ARG return code. This will either be
746     caught by a matching MARK, or get to the top, where it is treated the same
747 ph10 510 as PRUNE. */
748 ph10 512
749 ph10 510 md->start_match_ptr = ecode + 2;
750 ph10 512 RRETURN(MATCH_SKIP_ARG);
751    
752 ph10 210 case OP_THEN:
753     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
754 ph10 212 ims, eptrb, flags, RM54);
755 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
756 ph10 510 MRRETURN(MATCH_THEN);
757    
758     case OP_THEN_ARG:
759     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
760 ph10 512 ims, eptrb, flags, RM58);
761 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
762     md->mark = ecode + 2;
763 ph10 212 RRETURN(MATCH_THEN);
764 ph10 211
765 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
766     the current subject position in the working slot at the top of the vector.
767     We mustn't change the current values of the data slot, because they may be
768     set from a previous iteration of this group, and be referred to by a
769     reference inside the group.
770 nigel 77
771 nigel 93 If the bracket fails to match, we need to restore this value and also the
772     values of the final offsets, in case they were set by a previous iteration
773     of the same bracket.
774 nigel 77
775 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
776     a non-capturing bracket. Don't worry about setting the flag for the error
777     case here; that is handled in the code for KET. */
778 nigel 77
779 nigel 93 case OP_CBRA:
780     case OP_SCBRA:
781     number = GET2(ecode, 1+LINK_SIZE);
782 nigel 77 offset = number << 1;
783    
784 ph10 475 #ifdef PCRE_DEBUG
785 nigel 93 printf("start bracket %d\n", number);
786     printf("subject=");
787 nigel 77 pchars(eptr, 16, TRUE, md);
788     printf("\n");
789     #endif
790    
791     if (offset < md->offset_max)
792     {
793     save_offset1 = md->offset_vector[offset];
794     save_offset2 = md->offset_vector[offset+1];
795     save_offset3 = md->offset_vector[md->offset_end - number];
796     save_capture_last = md->capture_last;
797    
798     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
799 ph10 531 md->offset_vector[md->offset_end - number] =
800 ph10 530 (int)(eptr - md->start_subject);
801 nigel 77
802 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
803 nigel 77 do
804     {
805 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
806     ims, eptrb, flags, RM1);
807 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
808 nigel 77 md->capture_last = save_capture_last;
809     ecode += GET(ecode, 1);
810     }
811     while (*ecode == OP_ALT);
812    
813     DPRINTF(("bracket %d failed\n", number));
814    
815     md->offset_vector[offset] = save_offset1;
816     md->offset_vector[offset+1] = save_offset2;
817     md->offset_vector[md->offset_end - number] = save_offset3;
818    
819 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
820 nigel 77 RRETURN(MATCH_NOMATCH);
821     }
822    
823 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
824     as a non-capturing bracket. */
825 nigel 77
826 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
830 nigel 77
831 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
832     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
833    
834 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
835     final alternative within the brackets, we would return the result of a
836     recursive call to match() whatever happened. We can reduce stack usage by
837 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
838     is set.*/
839 nigel 77
840 nigel 93 case OP_BRA:
841     case OP_SBRA:
842     DPRINTF(("start non-capturing bracket\n"));
843     flags = (op >= OP_SBRA)? match_cbegroup : 0;
844 nigel 91 for (;;)
845 nigel 77 {
846 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
847 nigel 93 {
848 ph10 197 if (flags == 0) /* Not a possibly empty group */
849     {
850     ecode += _pcre_OP_lengths[*ecode];
851     DPRINTF(("bracket 0 tail recursion\n"));
852     goto TAIL_RECURSE;
853     }
854    
855     /* Possibly empty group; can't use tail recursion. */
856    
857     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
858     eptrb, flags, RM48);
859 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
860     RRETURN(rrc);
861 nigel 93 }
862 nigel 91
863     /* For non-final alternatives, continue the loop for a NOMATCH result;
864     otherwise return. */
865    
866 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
867     eptrb, flags, RM2);
868 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
869 nigel 77 ecode += GET(ecode, 1);
870     }
871 nigel 91 /* Control never reaches here. */
872 nigel 77
873     /* Conditional group: compilation checked that there are no more than
874     two branches. If the condition is false, skipping the first branch takes us
875     past the end if there is only one branch, but that's OK because that is
876 nigel 91 exactly what going to the ket would do. As there is only one branch to be
877     obeyed, we can use tail recursion to avoid using another stack frame. */
878 nigel 77
879     case OP_COND:
880 nigel 93 case OP_SCOND:
881 ph10 399 codelink= GET(ecode, 1);
882 ph10 406
883 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
884     inserted between OP_COND and an assertion condition. */
885 ph10 392
886 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
887     {
888     if (pcre_callout != NULL)
889     {
890     pcre_callout_block cb;
891     cb.version = 1; /* Version 1 of the callout block */
892     cb.callout_number = ecode[LINK_SIZE+2];
893     cb.offset_vector = md->offset_vector;
894     cb.subject = (PCRE_SPTR)md->start_subject;
895 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
896     cb.start_match = (int)(mstart - md->start_subject);
897     cb.current_position = (int)(eptr - md->start_subject);
898 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
899     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
900     cb.capture_top = offset_top/2;
901     cb.capture_last = md->capture_last;
902     cb.callout_data = md->callout_data;
903 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
904 ph10 381 if (rrc < 0) RRETURN(rrc);
905     }
906     ecode += _pcre_OP_lengths[OP_CALLOUT];
907     }
908 ph10 392
909 ph10 399 condcode = ecode[LINK_SIZE+1];
910 ph10 406
911 ph10 381 /* Now see what the actual condition is */
912 ph10 392
913 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
914 nigel 77 {
915 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
916     {
917 ph10 461 condition = FALSE;
918     ecode += GET(ecode, 1);
919     }
920 ph10 459 else
921 ph10 461 {
922 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
923     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
924 ph10 461
925 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
926     false, but the test was set up by name, scan the table to see if the
927     name refers to any other numbers, and test them. The condition is true
928     if any one is set. */
929 ph10 461
930 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
931     {
932     uschar *slotA = md->name_table;
933     for (i = 0; i < md->name_count; i++)
934 ph10 461 {
935     if (GET2(slotA, 0) == recno) break;
936 ph10 459 slotA += md->name_entry_size;
937     }
938 ph10 461
939 ph10 459 /* Found a name for the number - there can be only one; duplicate
940     names for different numbers are allowed, but not vice versa. First
941     scan down for duplicates. */
942 ph10 461
943 ph10 459 if (i < md->name_count)
944 ph10 461 {
945 ph10 459 uschar *slotB = slotA;
946     while (slotB > md->name_table)
947     {
948     slotB -= md->name_entry_size;
949     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
950     {
951     condition = GET2(slotB, 0) == md->recursive->group_num;
952 ph10 461 if (condition) break;
953     }
954 ph10 459 else break;
955 ph10 461 }
956    
957 ph10 459 /* Scan up for duplicates */
958 ph10 461
959 ph10 459 if (!condition)
960 ph10 461 {
961 ph10 459 slotB = slotA;
962     for (i++; i < md->name_count; i++)
963     {
964     slotB += md->name_entry_size;
965     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966     {
967     condition = GET2(slotB, 0) == md->recursive->group_num;
968     if (condition) break;
969 ph10 461 }
970 ph10 459 else break;
971 ph10 461 }
972     }
973 ph10 459 }
974 ph10 461 }
975    
976 ph10 459 /* Chose branch according to the condition */
977 ph10 461
978 ph10 459 ecode += condition? 3 : GET(ecode, 1);
979     }
980 ph10 461 }
981 nigel 93
982 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
983 nigel 93 {
984 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
985 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
986 ph10 461
987 ph10 459 /* If the numbered capture is unset, but the reference was by name,
988 ph10 461 scan the table to see if the name refers to any other numbers, and test
989     them. The condition is true if any one is set. This is tediously similar
990     to the code above, but not close enough to try to amalgamate. */
991    
992 ph10 459 if (!condition && condcode == OP_NCREF)
993     {
994 ph10 461 int refno = offset >> 1;
995 ph10 459 uschar *slotA = md->name_table;
996 ph10 461
997 ph10 459 for (i = 0; i < md->name_count; i++)
998 ph10 461 {
999     if (GET2(slotA, 0) == refno) break;
1000 ph10 459 slotA += md->name_entry_size;
1001     }
1002 ph10 461
1003     /* Found a name for the number - there can be only one; duplicate names
1004     for different numbers are allowed, but not vice versa. First scan down
1005 ph10 459 for duplicates. */
1006 ph10 461
1007 ph10 459 if (i < md->name_count)
1008 ph10 461 {
1009 ph10 459 uschar *slotB = slotA;
1010     while (slotB > md->name_table)
1011     {
1012     slotB -= md->name_entry_size;
1013     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1014     {
1015     offset = GET2(slotB, 0) << 1;
1016 ph10 461 condition = offset < offset_top &&
1017 ph10 459 md->offset_vector[offset] >= 0;
1018 ph10 461 if (condition) break;
1019     }
1020 ph10 459 else break;
1021 ph10 461 }
1022    
1023 ph10 459 /* Scan up for duplicates */
1024 ph10 461
1025 ph10 459 if (!condition)
1026 ph10 461 {
1027 ph10 459 slotB = slotA;
1028     for (i++; i < md->name_count; i++)
1029     {
1030     slotB += md->name_entry_size;
1031     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1032     {
1033     offset = GET2(slotB, 0) << 1;
1034 ph10 461 condition = offset < offset_top &&
1035 ph10 459 md->offset_vector[offset] >= 0;
1036 ph10 461 if (condition) break;
1037     }
1038 ph10 459 else break;
1039 ph10 461 }
1040     }
1041 ph10 459 }
1042 ph10 461 }
1043    
1044 ph10 459 /* Chose branch according to the condition */
1045    
1046 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1047 nigel 77 }
1048    
1049 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1050 nigel 93 {
1051     condition = FALSE;
1052     ecode += GET(ecode, 1);
1053     }
1054    
1055 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1056 nigel 93 the final argument match_condassert causes it to stop at the end of an
1057     assertion. */
1058 nigel 77
1059     else
1060     {
1061 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1062     match_condassert, RM3);
1063 nigel 77 if (rrc == MATCH_MATCH)
1064     {
1065 nigel 93 condition = TRUE;
1066     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1067 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1068     }
1069 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1070 nigel 77 {
1071     RRETURN(rrc); /* Need braces because of following else */
1072     }
1073 nigel 93 else
1074     {
1075     condition = FALSE;
1076 ph10 399 ecode += codelink;
1077 nigel 93 }
1078     }
1079 nigel 91
1080 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1081 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1082     match_cbegroup is required for an unlimited repeat of a possibly empty
1083     group. If the second alternative doesn't exist, we can just plough on. */
1084 nigel 91
1085 nigel 93 if (condition || *ecode == OP_ALT)
1086     {
1087 nigel 91 ecode += 1 + LINK_SIZE;
1088 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1089     {
1090     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1091     RRETURN(rrc);
1092     }
1093     else /* Group must match something */
1094     {
1095     flags = 0;
1096     goto TAIL_RECURSE;
1097     }
1098 nigel 77 }
1099 ph10 395 else /* Condition false & no alternative */
1100 nigel 93 {
1101     ecode += 1 + LINK_SIZE;
1102     }
1103     break;
1104 nigel 77
1105 ph10 461
1106 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1107     to close any currently open capturing brackets. */
1108 ph10 461
1109 ph10 447 case OP_CLOSE:
1110 ph10 461 number = GET2(ecode, 1);
1111 ph10 447 offset = number << 1;
1112 ph10 461
1113 ph10 475 #ifdef PCRE_DEBUG
1114 ph10 447 printf("end bracket %d at *ACCEPT", number);
1115     printf("\n");
1116     #endif
1117 nigel 77
1118 ph10 447 md->capture_last = number;
1119     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1120     {
1121     md->offset_vector[offset] =
1122     md->offset_vector[md->offset_end - number];
1123 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1124 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1125     }
1126     ecode += 3;
1127 ph10 461 break;
1128 ph10 447
1129    
1130 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1131     recursion, we should restore the offsets appropriately and continue from
1132     after the call. */
1133 nigel 77
1134 ph10 210 case OP_ACCEPT:
1135 nigel 77 case OP_END:
1136     if (md->recursive != NULL && md->recursive->group_num == 0)
1137     {
1138     recursion_info *rec = md->recursive;
1139 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1140 nigel 77 md->recursive = rec->prevrec;
1141     memmove(md->offset_vector, rec->offset_save,
1142     rec->saved_max * sizeof(int));
1143 ph10 461 offset_top = rec->save_offset_top;
1144 nigel 77 ims = original_ims;
1145     ecode = rec->after_call;
1146     break;
1147     }
1148    
1149 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1150     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1151     the subject. In both cases, backtracking will then try other alternatives,
1152     if any. */
1153 ph10 443
1154 ph10 442 if (eptr == mstart &&
1155     (md->notempty ||
1156 ph10 443 (md->notempty_atstart &&
1157 ph10 442 mstart == md->start_subject + md->start_offset)))
1158 ph10 510 MRRETURN(MATCH_NOMATCH);
1159 ph10 443
1160 ph10 442 /* Otherwise, we have a match. */
1161 nigel 77
1162 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1163     md->end_offset_top = offset_top; /* and how many extracts were taken */
1164 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1165 nigel 77
1166 ph10 512 /* For some reason, the macros don't work properly if an expression is
1167     given as the argument to MRRETURN when the heap is in use. */
1168    
1169     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1170     MRRETURN(rrc);
1171    
1172 nigel 77 /* Change option settings */
1173    
1174     case OP_OPT:
1175     ims = ecode[1];
1176     ecode += 2;
1177     DPRINTF(("ims set to %02lx\n", ims));
1178     break;
1179    
1180     /* Assertion brackets. Check the alternative branches in turn - the
1181     matching won't pass the KET for an assertion. If any one branch matches,
1182     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1183     start of each branch to move the current point backwards, so the code at
1184     this level is identical to the lookahead case. */
1185    
1186     case OP_ASSERT:
1187     case OP_ASSERTBACK:
1188     do
1189     {
1190 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1191     RM4);
1192 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1193 ph10 500 {
1194     mstart = md->start_match_ptr; /* In case \K reset it */
1195     break;
1196 ph10 501 }
1197 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1198 nigel 77 ecode += GET(ecode, 1);
1199     }
1200     while (*ecode == OP_ALT);
1201 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1202 nigel 77
1203     /* If checking an assertion for a condition, return MATCH_MATCH. */
1204    
1205     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1206    
1207     /* Continue from after the assertion, updating the offsets high water
1208     mark, since extracts may have been taken during the assertion. */
1209    
1210     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1211     ecode += 1 + LINK_SIZE;
1212     offset_top = md->end_offset_top;
1213     continue;
1214    
1215 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1216 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1217 ph10 473 branches. */
1218 nigel 77
1219     case OP_ASSERT_NOT:
1220     case OP_ASSERTBACK_NOT:
1221     do
1222     {
1223 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1224     RM5);
1225 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1226 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1227     {
1228     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1229 ph10 482 break;
1230     }
1231 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1232 nigel 77 ecode += GET(ecode,1);
1233     }
1234     while (*ecode == OP_ALT);
1235    
1236     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1237    
1238     ecode += 1 + LINK_SIZE;
1239     continue;
1240    
1241     /* Move the subject pointer back. This occurs only at the start of
1242     each branch of a lookbehind assertion. If we are too close to the start to
1243     move back, this match function fails. When working with UTF-8 we move
1244     back a number of characters, not bytes. */
1245    
1246     case OP_REVERSE:
1247     #ifdef SUPPORT_UTF8
1248     if (utf8)
1249     {
1250 nigel 93 i = GET(ecode, 1);
1251     while (i-- > 0)
1252 nigel 77 {
1253     eptr--;
1254 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1255 ph10 207 BACKCHAR(eptr);
1256 nigel 77 }
1257     }
1258     else
1259     #endif
1260    
1261     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1262    
1263     {
1264 nigel 93 eptr -= GET(ecode, 1);
1265 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1266 nigel 77 }
1267    
1268 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1269 nigel 77
1270 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1271 nigel 77 ecode += 1 + LINK_SIZE;
1272     break;
1273    
1274     /* The callout item calls an external function, if one is provided, passing
1275     details of the match so far. This is mainly for debugging, though the
1276     function is able to force a failure. */
1277    
1278     case OP_CALLOUT:
1279     if (pcre_callout != NULL)
1280     {
1281     pcre_callout_block cb;
1282     cb.version = 1; /* Version 1 of the callout block */
1283     cb.callout_number = ecode[1];
1284     cb.offset_vector = md->offset_vector;
1285 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1286 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1287     cb.start_match = (int)(mstart - md->start_subject);
1288     cb.current_position = (int)(eptr - md->start_subject);
1289 nigel 77 cb.pattern_position = GET(ecode, 2);
1290     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1291     cb.capture_top = offset_top/2;
1292     cb.capture_last = md->capture_last;
1293     cb.callout_data = md->callout_data;
1294 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1295 nigel 77 if (rrc < 0) RRETURN(rrc);
1296     }
1297     ecode += 2 + 2*LINK_SIZE;
1298     break;
1299    
1300     /* Recursion either matches the current regex, or some subexpression. The
1301     offset data is the offset to the starting bracket from the start of the
1302     whole pattern. (This is so that it works from duplicated subpatterns.)
1303    
1304     If there are any capturing brackets started but not finished, we have to
1305     save their starting points and reinstate them after the recursion. However,
1306     we don't know how many such there are (offset_top records the completed
1307     total) so we just have to save all the potential data. There may be up to
1308     65535 such values, which is too large to put on the stack, but using malloc
1309     for small numbers seems expensive. As a compromise, the stack is used when
1310     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1311     is used. A problem is what to do if the malloc fails ... there is no way of
1312     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1313     values on the stack, and accept that the rest may be wrong.
1314    
1315     There are also other values that have to be saved. We use a chained
1316     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1317     for the original version of this logic. */
1318    
1319     case OP_RECURSE:
1320     {
1321     callpat = md->start_code + GET(ecode, 1);
1322 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1323     GET2(callpat, 1 + LINK_SIZE);
1324 nigel 77
1325     /* Add to "recursing stack" */
1326    
1327     new_recursive.prevrec = md->recursive;
1328     md->recursive = &new_recursive;
1329    
1330     /* Find where to continue from afterwards */
1331    
1332     ecode += 1 + LINK_SIZE;
1333     new_recursive.after_call = ecode;
1334    
1335     /* Now save the offset data. */
1336    
1337     new_recursive.saved_max = md->offset_end;
1338     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1339     new_recursive.offset_save = stacksave;
1340     else
1341     {
1342     new_recursive.offset_save =
1343     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1344     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1345     }
1346    
1347     memcpy(new_recursive.offset_save, md->offset_vector,
1348     new_recursive.saved_max * sizeof(int));
1349 ph10 461 new_recursive.save_offset_top = offset_top;
1350 nigel 77
1351     /* OK, now we can do the recursion. For each top-level alternative we
1352     restore the offset and recursion data. */
1353    
1354     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1355 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1356 nigel 77 do
1357     {
1358 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1359     md, ims, eptrb, flags, RM6);
1360 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1361 nigel 77 {
1362 nigel 87 DPRINTF(("Recursion matched\n"));
1363 nigel 77 md->recursive = new_recursive.prevrec;
1364     if (new_recursive.offset_save != stacksave)
1365     (pcre_free)(new_recursive.offset_save);
1366 ph10 510 MRRETURN(MATCH_MATCH);
1367 nigel 77 }
1368 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1369 nigel 87 {
1370     DPRINTF(("Recursion gave error %d\n", rrc));
1371 ph10 400 if (new_recursive.offset_save != stacksave)
1372     (pcre_free)(new_recursive.offset_save);
1373 nigel 87 RRETURN(rrc);
1374     }
1375 nigel 77
1376     md->recursive = &new_recursive;
1377     memcpy(md->offset_vector, new_recursive.offset_save,
1378     new_recursive.saved_max * sizeof(int));
1379     callpat += GET(callpat, 1);
1380     }
1381     while (*callpat == OP_ALT);
1382    
1383     DPRINTF(("Recursion didn't match\n"));
1384     md->recursive = new_recursive.prevrec;
1385     if (new_recursive.offset_save != stacksave)
1386     (pcre_free)(new_recursive.offset_save);
1387 ph10 510 MRRETURN(MATCH_NOMATCH);
1388 nigel 77 }
1389     /* Control never reaches here */
1390    
1391     /* "Once" brackets are like assertion brackets except that after a match,
1392     the point in the subject string is not moved back. Thus there can never be
1393     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1394     Check the alternative branches in turn - the matching won't pass the KET
1395     for this kind of subpattern. If any one branch matches, we carry on as at
1396 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1397     the start-of-match value in case it was changed by \K. */
1398 nigel 77
1399     case OP_ONCE:
1400 nigel 91 prev = ecode;
1401     saved_eptr = eptr;
1402    
1403     do
1404 nigel 77 {
1405 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1406 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1407 ph10 500 {
1408     mstart = md->start_match_ptr;
1409     break;
1410 ph10 501 }
1411 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1412 nigel 91 ecode += GET(ecode,1);
1413     }
1414     while (*ecode == OP_ALT);
1415 nigel 77
1416 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1417 nigel 77
1418 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1419 nigel 77
1420 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1421     mark, since extracts may have been taken. */
1422 nigel 77
1423 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1424 nigel 77
1425 nigel 91 offset_top = md->end_offset_top;
1426     eptr = md->end_match_ptr;
1427 nigel 77
1428 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1429     happens for a repeating ket if no characters were matched in the group.
1430     This is the forcible breaking of infinite loops as implemented in Perl
1431     5.005. If there is an options reset, it will get obeyed in the normal
1432     course of events. */
1433 nigel 77
1434 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1435     {
1436     ecode += 1+LINK_SIZE;
1437     break;
1438     }
1439 nigel 77
1440 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1441     preceding bracket, in the appropriate order. The second "call" of match()
1442     uses tail recursion, to avoid using another stack frame. We need to reset
1443     any options that changed within the bracket before re-running it, so
1444     check the next opcode. */
1445 nigel 77
1446 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1447     {
1448     ims = (ims & ~PCRE_IMS) | ecode[4];
1449     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1450     }
1451 nigel 77
1452 nigel 91 if (*ecode == OP_KETRMIN)
1453     {
1454 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1455 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1456     ecode = prev;
1457 ph10 197 flags = 0;
1458 nigel 91 goto TAIL_RECURSE;
1459 nigel 77 }
1460 nigel 91 else /* OP_KETRMAX */
1461     {
1462 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1463 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1464     ecode += 1 + LINK_SIZE;
1465 ph10 197 flags = 0;
1466 nigel 91 goto TAIL_RECURSE;
1467     }
1468     /* Control never gets here */
1469 nigel 77
1470     /* An alternation is the end of a branch; scan along to find the end of the
1471     bracketed group and go to there. */
1472    
1473     case OP_ALT:
1474     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1475     break;
1476    
1477 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1478     indicating that it may occur zero times. It may repeat infinitely, or not
1479     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1480     with fixed upper repeat limits are compiled as a number of copies, with the
1481     optional ones preceded by BRAZERO or BRAMINZERO. */
1482 nigel 77
1483     case OP_BRAZERO:
1484     {
1485     next = ecode+1;
1486 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1487 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1488     do next += GET(next,1); while (*next == OP_ALT);
1489 nigel 93 ecode = next + 1 + LINK_SIZE;
1490 nigel 77 }
1491     break;
1492    
1493     case OP_BRAMINZERO:
1494     {
1495     next = ecode+1;
1496 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1497 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1498 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1499     ecode++;
1500     }
1501     break;
1502    
1503 ph10 335 case OP_SKIPZERO:
1504     {
1505     next = ecode+1;
1506     do next += GET(next,1); while (*next == OP_ALT);
1507     ecode = next + 1 + LINK_SIZE;
1508     }
1509     break;
1510    
1511 nigel 93 /* End of a group, repeated or non-repeating. */
1512 nigel 77
1513     case OP_KET:
1514     case OP_KETRMIN:
1515     case OP_KETRMAX:
1516 nigel 91 prev = ecode - GET(ecode, 1);
1517 nigel 77
1518 nigel 93 /* If this was a group that remembered the subject start, in order to break
1519     infinite repeats of empty string matches, retrieve the subject start from
1520     the chain. Otherwise, set it NULL. */
1521 nigel 77
1522 nigel 93 if (*prev >= OP_SBRA)
1523     {
1524     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1525     eptrb = eptrb->epb_prev; /* Backup to previous group */
1526     }
1527     else saved_eptr = NULL;
1528 nigel 77
1529 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1530     matching and return MATCH_MATCH, but record the current high water mark for
1531     use by positive assertions. We also need to record the match start in case
1532     it was changed by \K. */
1533 nigel 93
1534 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1535     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1536     *prev == OP_ONCE)
1537     {
1538     md->end_match_ptr = eptr; /* For ONCE */
1539     md->end_offset_top = offset_top;
1540 ph10 500 md->start_match_ptr = mstart;
1541 ph10 510 MRRETURN(MATCH_MATCH);
1542 nigel 91 }
1543 nigel 77
1544 nigel 93 /* For capturing groups we have to check the group number back at the start
1545     and if necessary complete handling an extraction by setting the offsets and
1546     bumping the high water mark. Note that whole-pattern recursion is coded as
1547     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1548     when the OP_END is reached. Other recursion is handled here. */
1549 nigel 77
1550 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1551 nigel 91 {
1552 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1553 nigel 91 offset = number << 1;
1554 ph10 461
1555 ph10 475 #ifdef PCRE_DEBUG
1556 nigel 91 printf("end bracket %d", number);
1557     printf("\n");
1558 nigel 77 #endif
1559    
1560 nigel 93 md->capture_last = number;
1561     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1562 nigel 91 {
1563 nigel 93 md->offset_vector[offset] =
1564     md->offset_vector[md->offset_end - number];
1565 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1566 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1567     }
1568 nigel 77
1569 nigel 93 /* Handle a recursively called group. Restore the offsets
1570     appropriately and continue from after the call. */
1571 nigel 77
1572 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1573     {
1574     recursion_info *rec = md->recursive;
1575     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1576     md->recursive = rec->prevrec;
1577     memcpy(md->offset_vector, rec->offset_save,
1578     rec->saved_max * sizeof(int));
1579 ph10 461 offset_top = rec->save_offset_top;
1580 nigel 93 ecode = rec->after_call;
1581     ims = original_ims;
1582     break;
1583 nigel 77 }
1584 nigel 91 }
1585 nigel 77
1586 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1587     flags, in case they got changed during the group. */
1588 nigel 77
1589 nigel 91 ims = original_ims;
1590     DPRINTF(("ims reset to %02lx\n", ims));
1591 nigel 77
1592 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1593     happens for a repeating ket if no characters were matched in the group.
1594     This is the forcible breaking of infinite loops as implemented in Perl
1595     5.005. If there is an options reset, it will get obeyed in the normal
1596     course of events. */
1597 nigel 77
1598 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1599     {
1600     ecode += 1 + LINK_SIZE;
1601     break;
1602     }
1603 nigel 77
1604 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1605     preceding bracket, in the appropriate order. In the second case, we can use
1606 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1607     unlimited repeat of a group that can match an empty string. */
1608 nigel 77
1609 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1610    
1611 nigel 91 if (*ecode == OP_KETRMIN)
1612     {
1613 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1614 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1615 ph10 197 if (flags != 0) /* Could match an empty string */
1616     {
1617     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1618     RRETURN(rrc);
1619     }
1620 nigel 91 ecode = prev;
1621     goto TAIL_RECURSE;
1622 nigel 77 }
1623 nigel 91 else /* OP_KETRMAX */
1624     {
1625 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1626 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627     ecode += 1 + LINK_SIZE;
1628 ph10 197 flags = 0;
1629 nigel 91 goto TAIL_RECURSE;
1630     }
1631     /* Control never gets here */
1632 nigel 77
1633     /* Start of subject unless notbol, or after internal newline if multiline */
1634    
1635     case OP_CIRC:
1636 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1637 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1638     {
1639 nigel 91 if (eptr != md->start_subject &&
1640 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1641 ph10 510 MRRETURN(MATCH_NOMATCH);
1642 nigel 77 ecode++;
1643     break;
1644     }
1645     /* ... else fall through */
1646    
1647     /* Start of subject assertion */
1648    
1649     case OP_SOD:
1650 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1651 nigel 77 ecode++;
1652     break;
1653    
1654     /* Start of match assertion */
1655    
1656     case OP_SOM:
1657 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1658 nigel 77 ecode++;
1659     break;
1660 ph10 172
1661 ph10 168 /* Reset the start of match point */
1662 ph10 172
1663 ph10 168 case OP_SET_SOM:
1664     mstart = eptr;
1665 ph10 172 ecode++;
1666     break;
1667 nigel 77
1668     /* Assert before internal newline if multiline, or before a terminating
1669     newline unless endonly is set, else end of subject unless noteol is set. */
1670    
1671     case OP_DOLL:
1672     if ((ims & PCRE_MULTILINE) != 0)
1673     {
1674     if (eptr < md->end_subject)
1675 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1676 nigel 77 else
1677 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1678 nigel 77 ecode++;
1679     break;
1680     }
1681     else
1682     {
1683 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1684 nigel 77 if (!md->endonly)
1685     {
1686 nigel 91 if (eptr != md->end_subject &&
1687 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1688 ph10 510 MRRETURN(MATCH_NOMATCH);
1689 nigel 77 ecode++;
1690     break;
1691     }
1692     }
1693 nigel 91 /* ... else fall through for endonly */
1694 nigel 77
1695     /* End of subject assertion (\z) */
1696    
1697     case OP_EOD:
1698 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1699 nigel 77 ecode++;
1700     break;
1701    
1702     /* End of subject or ending \n assertion (\Z) */
1703    
1704     case OP_EODN:
1705 nigel 91 if (eptr != md->end_subject &&
1706 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1707 ph10 510 MRRETURN(MATCH_NOMATCH);
1708 nigel 77 ecode++;
1709     break;
1710    
1711     /* Word boundary assertions */
1712    
1713     case OP_NOT_WORD_BOUNDARY:
1714     case OP_WORD_BOUNDARY:
1715     {
1716    
1717     /* Find out if the previous and current characters are "word" characters.
1718     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1719 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1720 ph10 435 partial matching. */
1721 nigel 77
1722     #ifdef SUPPORT_UTF8
1723     if (utf8)
1724     {
1725 ph10 518 /* Get status of previous character */
1726 ph10 527
1727 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1728     {
1729 ph10 409 USPTR lastptr = eptr - 1;
1730 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1731 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1732 nigel 77 GETCHAR(c, lastptr);
1733 ph10 527 #ifdef SUPPORT_UCP
1734 ph10 518 if (md->use_ucp)
1735     {
1736     if (c == '_') prev_is_word = TRUE; else
1737 ph10 527 {
1738 ph10 518 int cat = UCD_CATEGORY(c);
1739     prev_is_word = (cat == ucp_L || cat == ucp_N);
1740 ph10 527 }
1741     }
1742     else
1743     #endif
1744 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1745     }
1746 ph10 527
1747 ph10 518 /* Get status of next character */
1748 ph10 527
1749 ph10 443 if (eptr >= md->end_subject)
1750 nigel 77 {
1751 ph10 443 SCHECK_PARTIAL();
1752     cur_is_word = FALSE;
1753 ph10 428 }
1754     else
1755     {
1756 nigel 77 GETCHAR(c, eptr);
1757 ph10 527 #ifdef SUPPORT_UCP
1758 ph10 518 if (md->use_ucp)
1759     {
1760     if (c == '_') cur_is_word = TRUE; else
1761 ph10 527 {
1762 ph10 518 int cat = UCD_CATEGORY(c);
1763     cur_is_word = (cat == ucp_L || cat == ucp_N);
1764 ph10 527 }
1765     }
1766     else
1767     #endif
1768 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1769     }
1770     }
1771     else
1772     #endif
1773    
1774 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1775 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1776 nigel 77
1777     {
1778 ph10 518 /* Get status of previous character */
1779 ph10 527
1780 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1781     {
1782 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1783 ph10 527 #ifdef SUPPORT_UCP
1784 ph10 518 if (md->use_ucp)
1785     {
1786 ph10 527 c = eptr[-1];
1787 ph10 518 if (c == '_') prev_is_word = TRUE; else
1788 ph10 527 {
1789 ph10 518 int cat = UCD_CATEGORY(c);
1790     prev_is_word = (cat == ucp_L || cat == ucp_N);
1791 ph10 527 }
1792     }
1793     else
1794     #endif
1795 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1796     }
1797 ph10 527
1798 ph10 518 /* Get status of next character */
1799 ph10 527
1800 ph10 443 if (eptr >= md->end_subject)
1801 ph10 428 {
1802 ph10 443 SCHECK_PARTIAL();
1803     cur_is_word = FALSE;
1804 ph10 428 }
1805 ph10 527 else
1806     #ifdef SUPPORT_UCP
1807 ph10 518 if (md->use_ucp)
1808     {
1809 ph10 527 c = *eptr;
1810 ph10 518 if (c == '_') cur_is_word = TRUE; else
1811 ph10 527 {
1812 ph10 518 int cat = UCD_CATEGORY(c);
1813     cur_is_word = (cat == ucp_L || cat == ucp_N);
1814 ph10 527 }
1815     }
1816     else
1817     #endif
1818 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1819 nigel 77 }
1820    
1821     /* Now see if the situation is what we want */
1822    
1823     if ((*ecode++ == OP_WORD_BOUNDARY)?
1824     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1825 ph10 510 MRRETURN(MATCH_NOMATCH);
1826 nigel 77 }
1827     break;
1828    
1829     /* Match a single character type; inline for speed */
1830    
1831     case OP_ANY:
1832 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1833 ph10 345 /* Fall through */
1834    
1835 ph10 341 case OP_ALLANY:
1836 ph10 443 if (eptr++ >= md->end_subject)
1837 ph10 428 {
1838 ph10 443 SCHECK_PARTIAL();
1839 ph10 510 MRRETURN(MATCH_NOMATCH);
1840 ph10 443 }
1841 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1842 nigel 77 ecode++;
1843     break;
1844    
1845     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1846     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1847    
1848     case OP_ANYBYTE:
1849 ph10 443 if (eptr++ >= md->end_subject)
1850 ph10 428 {
1851 ph10 443 SCHECK_PARTIAL();
1852 ph10 510 MRRETURN(MATCH_NOMATCH);
1853 ph10 443 }
1854 nigel 77 ecode++;
1855     break;
1856    
1857     case OP_NOT_DIGIT:
1858 ph10 443 if (eptr >= md->end_subject)
1859 ph10 428 {
1860 ph10 443 SCHECK_PARTIAL();
1861 ph10 510 MRRETURN(MATCH_NOMATCH);
1862 ph10 443 }
1863 nigel 77 GETCHARINCTEST(c, eptr);
1864     if (
1865     #ifdef SUPPORT_UTF8
1866     c < 256 &&
1867     #endif
1868     (md->ctypes[c] & ctype_digit) != 0
1869     )
1870 ph10 510 MRRETURN(MATCH_NOMATCH);
1871 nigel 77 ecode++;
1872     break;
1873    
1874     case OP_DIGIT:
1875 ph10 443 if (eptr >= md->end_subject)
1876 ph10 428 {
1877 ph10 443 SCHECK_PARTIAL();
1878 ph10 510 MRRETURN(MATCH_NOMATCH);
1879 ph10 443 }
1880 nigel 77 GETCHARINCTEST(c, eptr);
1881     if (
1882     #ifdef SUPPORT_UTF8
1883     c >= 256 ||
1884     #endif
1885     (md->ctypes[c] & ctype_digit) == 0
1886     )
1887 ph10 510 MRRETURN(MATCH_NOMATCH);
1888 nigel 77 ecode++;
1889     break;
1890    
1891     case OP_NOT_WHITESPACE:
1892 ph10 443 if (eptr >= md->end_subject)
1893 ph10 428 {
1894 ph10 443 SCHECK_PARTIAL();
1895 ph10 510 MRRETURN(MATCH_NOMATCH);
1896 ph10 443 }
1897 nigel 77 GETCHARINCTEST(c, eptr);
1898     if (
1899     #ifdef SUPPORT_UTF8
1900     c < 256 &&
1901     #endif
1902     (md->ctypes[c] & ctype_space) != 0
1903     )
1904 ph10 510 MRRETURN(MATCH_NOMATCH);
1905 nigel 77 ecode++;
1906     break;
1907    
1908     case OP_WHITESPACE:
1909 ph10 443 if (eptr >= md->end_subject)
1910 ph10 428 {
1911 ph10 443 SCHECK_PARTIAL();
1912 ph10 510 MRRETURN(MATCH_NOMATCH);
1913 ph10 443 }
1914 nigel 77 GETCHARINCTEST(c, eptr);
1915     if (
1916     #ifdef SUPPORT_UTF8
1917     c >= 256 ||
1918     #endif
1919     (md->ctypes[c] & ctype_space) == 0
1920     )
1921 ph10 510 MRRETURN(MATCH_NOMATCH);
1922 nigel 77 ecode++;
1923     break;
1924    
1925     case OP_NOT_WORDCHAR:
1926 ph10 443 if (eptr >= md->end_subject)
1927 ph10 428 {
1928 ph10 443 SCHECK_PARTIAL();
1929 ph10 510 MRRETURN(MATCH_NOMATCH);
1930 ph10 443 }
1931 nigel 77 GETCHARINCTEST(c, eptr);
1932     if (
1933     #ifdef SUPPORT_UTF8
1934     c < 256 &&
1935     #endif
1936     (md->ctypes[c] & ctype_word) != 0
1937     )
1938 ph10 510 MRRETURN(MATCH_NOMATCH);
1939 nigel 77 ecode++;
1940     break;
1941    
1942     case OP_WORDCHAR:
1943 ph10 443 if (eptr >= md->end_subject)
1944 ph10 428 {
1945 ph10 443 SCHECK_PARTIAL();
1946 ph10 510 MRRETURN(MATCH_NOMATCH);
1947 ph10 443 }
1948 nigel 77 GETCHARINCTEST(c, eptr);
1949     if (
1950     #ifdef SUPPORT_UTF8
1951     c >= 256 ||
1952     #endif
1953     (md->ctypes[c] & ctype_word) == 0
1954     )
1955 ph10 510 MRRETURN(MATCH_NOMATCH);
1956 nigel 77 ecode++;
1957     break;
1958    
1959 nigel 93 case OP_ANYNL:
1960 ph10 443 if (eptr >= md->end_subject)
1961 ph10 428 {
1962 ph10 443 SCHECK_PARTIAL();
1963 ph10 510 MRRETURN(MATCH_NOMATCH);
1964 ph10 443 }
1965 nigel 93 GETCHARINCTEST(c, eptr);
1966     switch(c)
1967     {
1968 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1969 nigel 93 case 0x000d:
1970     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1971     break;
1972 ph10 231
1973 nigel 93 case 0x000a:
1974 ph10 231 break;
1975    
1976 nigel 93 case 0x000b:
1977     case 0x000c:
1978     case 0x0085:
1979     case 0x2028:
1980     case 0x2029:
1981 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1982 nigel 93 break;
1983     }
1984     ecode++;
1985     break;
1986    
1987 ph10 178 case OP_NOT_HSPACE:
1988 ph10 443 if (eptr >= md->end_subject)
1989 ph10 428 {
1990 ph10 443 SCHECK_PARTIAL();
1991 ph10 510 MRRETURN(MATCH_NOMATCH);
1992 ph10 443 }
1993 ph10 178 GETCHARINCTEST(c, eptr);
1994     switch(c)
1995     {
1996     default: break;
1997     case 0x09: /* HT */
1998     case 0x20: /* SPACE */
1999     case 0xa0: /* NBSP */
2000     case 0x1680: /* OGHAM SPACE MARK */
2001     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2002     case 0x2000: /* EN QUAD */
2003     case 0x2001: /* EM QUAD */
2004     case 0x2002: /* EN SPACE */
2005     case 0x2003: /* EM SPACE */
2006     case 0x2004: /* THREE-PER-EM SPACE */
2007     case 0x2005: /* FOUR-PER-EM SPACE */
2008     case 0x2006: /* SIX-PER-EM SPACE */
2009     case 0x2007: /* FIGURE SPACE */
2010     case 0x2008: /* PUNCTUATION SPACE */
2011     case 0x2009: /* THIN SPACE */
2012     case 0x200A: /* HAIR SPACE */
2013     case 0x202f: /* NARROW NO-BREAK SPACE */
2014     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2015     case 0x3000: /* IDEOGRAPHIC SPACE */
2016 ph10 510 MRRETURN(MATCH_NOMATCH);
2017 ph10 178 }
2018     ecode++;
2019     break;
2020    
2021     case OP_HSPACE:
2022 ph10 443 if (eptr >= md->end_subject)
2023 ph10 428 {
2024 ph10 443 SCHECK_PARTIAL();
2025 ph10 510 MRRETURN(MATCH_NOMATCH);
2026 ph10 443 }
2027 ph10 178 GETCHARINCTEST(c, eptr);
2028     switch(c)
2029     {
2030 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2031 ph10 178 case 0x09: /* HT */
2032     case 0x20: /* SPACE */
2033     case 0xa0: /* NBSP */
2034     case 0x1680: /* OGHAM SPACE MARK */
2035     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2036     case 0x2000: /* EN QUAD */
2037     case 0x2001: /* EM QUAD */
2038     case 0x2002: /* EN SPACE */
2039     case 0x2003: /* EM SPACE */
2040     case 0x2004: /* THREE-PER-EM SPACE */
2041     case 0x2005: /* FOUR-PER-EM SPACE */
2042     case 0x2006: /* SIX-PER-EM SPACE */
2043     case 0x2007: /* FIGURE SPACE */
2044     case 0x2008: /* PUNCTUATION SPACE */
2045     case 0x2009: /* THIN SPACE */
2046     case 0x200A: /* HAIR SPACE */
2047     case 0x202f: /* NARROW NO-BREAK SPACE */
2048     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2049     case 0x3000: /* IDEOGRAPHIC SPACE */
2050     break;
2051     }
2052     ecode++;
2053     break;
2054    
2055     case OP_NOT_VSPACE:
2056 ph10 443 if (eptr >= md->end_subject)
2057 ph10 428 {
2058 ph10 443 SCHECK_PARTIAL();
2059 ph10 510 MRRETURN(MATCH_NOMATCH);
2060 ph10 443 }
2061 ph10 178 GETCHARINCTEST(c, eptr);
2062     switch(c)
2063     {
2064     default: break;
2065     case 0x0a: /* LF */
2066     case 0x0b: /* VT */
2067     case 0x0c: /* FF */
2068     case 0x0d: /* CR */
2069     case 0x85: /* NEL */
2070     case 0x2028: /* LINE SEPARATOR */
2071     case 0x2029: /* PARAGRAPH SEPARATOR */
2072 ph10 510 MRRETURN(MATCH_NOMATCH);
2073 ph10 178 }
2074     ecode++;
2075     break;
2076    
2077     case OP_VSPACE:
2078 ph10 443 if (eptr >= md->end_subject)
2079 ph10 428 {
2080 ph10 443 SCHECK_PARTIAL();
2081 ph10 510 MRRETURN(MATCH_NOMATCH);
2082 ph10 443 }
2083 ph10 178 GETCHARINCTEST(c, eptr);
2084     switch(c)
2085     {
2086 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2087 ph10 178 case 0x0a: /* LF */
2088     case 0x0b: /* VT */
2089     case 0x0c: /* FF */
2090     case 0x0d: /* CR */
2091     case 0x85: /* NEL */
2092     case 0x2028: /* LINE SEPARATOR */
2093     case 0x2029: /* PARAGRAPH SEPARATOR */
2094     break;
2095     }
2096     ecode++;
2097     break;
2098    
2099 nigel 77 #ifdef SUPPORT_UCP
2100     /* Check the next character by Unicode property. We will get here only
2101     if the support is in the binary; otherwise a compile-time error occurs. */
2102    
2103     case OP_PROP:
2104     case OP_NOTPROP:
2105 ph10 443 if (eptr >= md->end_subject)
2106 ph10 428 {
2107 ph10 443 SCHECK_PARTIAL();
2108 ph10 510 MRRETURN(MATCH_NOMATCH);
2109 ph10 443 }
2110 nigel 77 GETCHARINCTEST(c, eptr);
2111     {
2112 ph10 384 const ucd_record *prop = GET_UCD(c);
2113 nigel 77
2114 nigel 87 switch(ecode[1])
2115     {
2116     case PT_ANY:
2117 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2118 nigel 87 break;
2119 nigel 77
2120 nigel 87 case PT_LAMP:
2121 ph10 349 if ((prop->chartype == ucp_Lu ||
2122     prop->chartype == ucp_Ll ||
2123     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2124 ph10 510 MRRETURN(MATCH_NOMATCH);
2125 ph10 517 break;
2126 nigel 87
2127     case PT_GC:
2128 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2129 ph10 510 MRRETURN(MATCH_NOMATCH);
2130 nigel 87 break;
2131    
2132     case PT_PC:
2133 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2134 ph10 510 MRRETURN(MATCH_NOMATCH);
2135 nigel 87 break;
2136    
2137     case PT_SC:
2138 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2139 ph10 510 MRRETURN(MATCH_NOMATCH);
2140 nigel 87 break;
2141 ph10 527
2142 ph10 517 /* These are specials */
2143 ph10 527
2144 ph10 517 case PT_ALNUM:
2145     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2146     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2147     MRRETURN(MATCH_NOMATCH);
2148 ph10 527 break;
2149    
2150 ph10 517 case PT_SPACE: /* Perl space */
2151     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2152     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2153     == (op == OP_NOTPROP))
2154     MRRETURN(MATCH_NOMATCH);
2155 ph10 527 break;
2156    
2157 ph10 517 case PT_PXSPACE: /* POSIX space */
2158     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2159 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2160 ph10 517 c == CHAR_FF || c == CHAR_CR)
2161     == (op == OP_NOTPROP))
2162     MRRETURN(MATCH_NOMATCH);
2163 ph10 527 break;
2164 nigel 87
2165 ph10 527 case PT_WORD:
2166 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2167 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2168 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2169     MRRETURN(MATCH_NOMATCH);
2170 ph10 527 break;
2171    
2172 ph10 517 /* This should never occur */
2173    
2174 nigel 87 default:
2175     RRETURN(PCRE_ERROR_INTERNAL);
2176 nigel 77 }
2177 nigel 87
2178     ecode += 3;
2179 nigel 77 }
2180     break;
2181    
2182     /* Match an extended Unicode sequence. We will get here only if the support
2183     is in the binary; otherwise a compile-time error occurs. */
2184    
2185     case OP_EXTUNI:
2186 ph10 443 if (eptr >= md->end_subject)
2187 ph10 428 {
2188 ph10 443 SCHECK_PARTIAL();
2189 ph10 510 MRRETURN(MATCH_NOMATCH);
2190 ph10 443 }
2191 nigel 77 GETCHARINCTEST(c, eptr);
2192     {
2193 ph10 349 int category = UCD_CATEGORY(c);
2194 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2195 nigel 77 while (eptr < md->end_subject)
2196     {
2197     int len = 1;
2198     if (!utf8) c = *eptr; else
2199     {
2200     GETCHARLEN(c, eptr, len);
2201     }
2202 ph10 349 category = UCD_CATEGORY(c);
2203 nigel 77 if (category != ucp_M) break;
2204     eptr += len;
2205     }
2206     }
2207     ecode++;
2208     break;
2209     #endif
2210    
2211    
2212     /* Match a back reference, possibly repeatedly. Look past the end of the
2213     item to see if there is repeat information following. The code is similar
2214     to that for character classes, but repeated for efficiency. Then obey
2215     similar code to character type repeats - written out again for speed.
2216     However, if the referenced string is the empty string, always treat
2217     it as matched, any number of times (otherwise there could be infinite
2218     loops). */
2219    
2220     case OP_REF:
2221     {
2222     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2223 ph10 345 ecode += 3;
2224    
2225 ph10 336 /* If the reference is unset, there are two possibilities:
2226 ph10 345
2227 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2228     than the amount of subject left; this ensures that every attempt at a
2229     match fails. We can't just fail here, because of the possibility of
2230     quantifiers with zero minima.
2231 ph10 345
2232     (b) If the JavaScript compatibility flag is set, set the length to zero
2233     so that the back reference matches an empty string.
2234    
2235     Otherwise, set the length to the length of what was matched by the
2236 ph10 336 referenced subpattern. */
2237 ph10 345
2238 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2239 ph10 530 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2240 ph10 336 else
2241     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2242 nigel 77
2243     /* Set up for repetition, or handle the non-repeated case */
2244    
2245     switch (*ecode)
2246     {
2247     case OP_CRSTAR:
2248     case OP_CRMINSTAR:
2249     case OP_CRPLUS:
2250     case OP_CRMINPLUS:
2251     case OP_CRQUERY:
2252     case OP_CRMINQUERY:
2253     c = *ecode++ - OP_CRSTAR;
2254     minimize = (c & 1) != 0;
2255     min = rep_min[c]; /* Pick up values from tables; */
2256     max = rep_max[c]; /* zero for max => infinity */
2257     if (max == 0) max = INT_MAX;
2258     break;
2259    
2260     case OP_CRRANGE:
2261     case OP_CRMINRANGE:
2262     minimize = (*ecode == OP_CRMINRANGE);
2263     min = GET2(ecode, 1);
2264     max = GET2(ecode, 3);
2265     if (max == 0) max = INT_MAX;
2266     ecode += 5;
2267     break;
2268    
2269     default: /* No repeat follows */
2270 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2271 ph10 428 {
2272 ph10 443 CHECK_PARTIAL();
2273 ph10 510 MRRETURN(MATCH_NOMATCH);
2274 ph10 443 }
2275 nigel 77 eptr += length;
2276     continue; /* With the main loop */
2277     }
2278    
2279     /* If the length of the reference is zero, just continue with the
2280     main loop. */
2281 ph10 443
2282 nigel 77 if (length == 0) continue;
2283    
2284     /* First, ensure the minimum number of matches are present. We get back
2285     the length of the reference string explicitly rather than passing the
2286     address of eptr, so that eptr can be a register variable. */
2287    
2288     for (i = 1; i <= min; i++)
2289     {
2290 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2291 ph10 426 {
2292 ph10 427 CHECK_PARTIAL();
2293 ph10 510 MRRETURN(MATCH_NOMATCH);
2294 ph10 427 }
2295 nigel 77 eptr += length;
2296     }
2297    
2298     /* If min = max, continue at the same level without recursion.
2299     They are not both allowed to be zero. */
2300    
2301     if (min == max) continue;
2302    
2303     /* If minimizing, keep trying and advancing the pointer */
2304    
2305     if (minimize)
2306     {
2307     for (fi = min;; fi++)
2308     {
2309 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2310 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2311 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2312 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2313 ph10 426 {
2314 ph10 427 CHECK_PARTIAL();
2315 ph10 510 MRRETURN(MATCH_NOMATCH);
2316 ph10 427 }
2317 nigel 77 eptr += length;
2318     }
2319     /* Control never gets here */
2320     }
2321    
2322     /* If maximizing, find the longest string and work backwards */
2323    
2324     else
2325     {
2326     pp = eptr;
2327     for (i = min; i < max; i++)
2328     {
2329 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2330 ph10 462 {
2331 ph10 463 CHECK_PARTIAL();
2332 ph10 462 break;
2333 ph10 463 }
2334 nigel 77 eptr += length;
2335     }
2336     while (eptr >= pp)
2337     {
2338 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2339 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2340     eptr -= length;
2341     }
2342 ph10 510 MRRETURN(MATCH_NOMATCH);
2343 nigel 77 }
2344     }
2345     /* Control never gets here */
2346    
2347     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2348     used when all the characters in the class have values in the range 0-255,
2349     and either the matching is caseful, or the characters are in the range
2350     0-127 when UTF-8 processing is enabled. The only difference between
2351     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2352     encountered.
2353    
2354     First, look past the end of the item to see if there is repeat information
2355     following. Then obey similar code to character type repeats - written out
2356     again for speed. */
2357    
2358     case OP_NCLASS:
2359     case OP_CLASS:
2360     {
2361     data = ecode + 1; /* Save for matching */
2362     ecode += 33; /* Advance past the item */
2363    
2364     switch (*ecode)
2365     {
2366     case OP_CRSTAR:
2367     case OP_CRMINSTAR:
2368     case OP_CRPLUS:
2369     case OP_CRMINPLUS:
2370     case OP_CRQUERY:
2371     case OP_CRMINQUERY:
2372     c = *ecode++ - OP_CRSTAR;
2373     minimize = (c & 1) != 0;
2374     min = rep_min[c]; /* Pick up values from tables; */
2375     max = rep_max[c]; /* zero for max => infinity */
2376     if (max == 0) max = INT_MAX;
2377     break;
2378    
2379     case OP_CRRANGE:
2380     case OP_CRMINRANGE:
2381     minimize = (*ecode == OP_CRMINRANGE);
2382     min = GET2(ecode, 1);
2383     max = GET2(ecode, 3);
2384     if (max == 0) max = INT_MAX;
2385     ecode += 5;
2386     break;
2387    
2388     default: /* No repeat follows */
2389     min = max = 1;
2390     break;
2391     }
2392    
2393     /* First, ensure the minimum number of matches are present. */
2394    
2395     #ifdef SUPPORT_UTF8
2396     /* UTF-8 mode */
2397     if (utf8)
2398     {
2399     for (i = 1; i <= min; i++)
2400     {
2401 ph10 427 if (eptr >= md->end_subject)
2402 ph10 426 {
2403 ph10 428 SCHECK_PARTIAL();
2404 ph10 510 MRRETURN(MATCH_NOMATCH);
2405 ph10 427 }
2406 nigel 77 GETCHARINC(c, eptr);
2407     if (c > 255)
2408     {
2409 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2410 nigel 77 }
2411     else
2412     {
2413 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2414 nigel 77 }
2415     }
2416     }
2417     else
2418     #endif
2419     /* Not UTF-8 mode */
2420     {
2421     for (i = 1; i <= min; i++)
2422     {
2423 ph10 427 if (eptr >= md->end_subject)
2424 ph10 426 {
2425 ph10 428 SCHECK_PARTIAL();
2426 ph10 510 MRRETURN(MATCH_NOMATCH);
2427 ph10 427 }
2428 nigel 77 c = *eptr++;
2429 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2430 nigel 77 }
2431     }
2432    
2433     /* If max == min we can continue with the main loop without the
2434     need to recurse. */
2435    
2436     if (min == max) continue;
2437    
2438     /* If minimizing, keep testing the rest of the expression and advancing
2439     the pointer while it matches the class. */
2440    
2441     if (minimize)
2442     {
2443     #ifdef SUPPORT_UTF8
2444     /* UTF-8 mode */
2445     if (utf8)
2446     {
2447     for (fi = min;; fi++)
2448     {
2449 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2450 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2451 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2452 ph10 427 if (eptr >= md->end_subject)
2453 ph10 426 {
2454 ph10 427 SCHECK_PARTIAL();
2455 ph10 510 MRRETURN(MATCH_NOMATCH);
2456 ph10 427 }
2457 nigel 77 GETCHARINC(c, eptr);
2458     if (c > 255)
2459     {
2460 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2461 nigel 77 }
2462     else
2463     {
2464 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2465 nigel 77 }
2466     }
2467     }
2468     else
2469     #endif
2470     /* Not UTF-8 mode */
2471     {
2472     for (fi = min;; fi++)
2473     {
2474 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2475 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2476 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2477 ph10 427 if (eptr >= md->end_subject)
2478 ph10 426 {
2479 ph10 427 SCHECK_PARTIAL();
2480 ph10 510 MRRETURN(MATCH_NOMATCH);
2481 ph10 427 }
2482 nigel 77 c = *eptr++;
2483 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2484 nigel 77 }
2485     }
2486     /* Control never gets here */
2487     }
2488    
2489     /* If maximizing, find the longest possible run, then work backwards. */
2490    
2491     else
2492     {
2493     pp = eptr;
2494    
2495     #ifdef SUPPORT_UTF8
2496     /* UTF-8 mode */
2497     if (utf8)
2498     {
2499     for (i = min; i < max; i++)
2500     {
2501     int len = 1;
2502 ph10 463 if (eptr >= md->end_subject)
2503 ph10 462 {
2504 ph10 463 SCHECK_PARTIAL();
2505 ph10 462 break;
2506 ph10 463 }
2507 nigel 77 GETCHARLEN(c, eptr, len);
2508     if (c > 255)
2509     {
2510     if (op == OP_CLASS) break;
2511     }
2512     else
2513     {
2514     if ((data[c/8] & (1 << (c&7))) == 0) break;
2515     }
2516     eptr += len;
2517     }
2518     for (;;)
2519     {
2520 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2521 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2522     if (eptr-- == pp) break; /* Stop if tried at original pos */
2523     BACKCHAR(eptr);
2524     }
2525     }
2526     else
2527     #endif
2528     /* Not UTF-8 mode */
2529     {
2530     for (i = min; i < max; i++)
2531     {
2532 ph10 463 if (eptr >= md->end_subject)
2533 ph10 462 {
2534 ph10 463 SCHECK_PARTIAL();
2535 ph10 462 break;
2536 ph10 463 }
2537 nigel 77 c = *eptr;
2538     if ((data[c/8] & (1 << (c&7))) == 0) break;
2539     eptr++;
2540     }
2541     while (eptr >= pp)
2542     {
2543 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2544 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2545 nigel 77 eptr--;
2546     }
2547     }
2548    
2549 ph10 510 MRRETURN(MATCH_NOMATCH);
2550 nigel 77 }
2551     }
2552     /* Control never gets here */
2553    
2554    
2555     /* Match an extended character class. This opcode is encountered only
2556 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2557     mode, because Unicode properties are supported in non-UTF-8 mode. */
2558 nigel 77
2559     #ifdef SUPPORT_UTF8
2560     case OP_XCLASS:
2561     {
2562     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2563     ecode += GET(ecode, 1); /* Advance past the item */
2564    
2565     switch (*ecode)
2566     {
2567     case OP_CRSTAR:
2568     case OP_CRMINSTAR:
2569     case OP_CRPLUS:
2570     case OP_CRMINPLUS:
2571     case OP_CRQUERY:
2572     case OP_CRMINQUERY:
2573     c = *ecode++ - OP_CRSTAR;
2574     minimize = (c & 1) != 0;
2575     min = rep_min[c]; /* Pick up values from tables; */
2576     max = rep_max[c]; /* zero for max => infinity */
2577     if (max == 0) max = INT_MAX;
2578     break;
2579    
2580     case OP_CRRANGE:
2581     case OP_CRMINRANGE:
2582     minimize = (*ecode == OP_CRMINRANGE);
2583     min = GET2(ecode, 1);
2584     max = GET2(ecode, 3);
2585     if (max == 0) max = INT_MAX;
2586     ecode += 5;
2587     break;
2588    
2589     default: /* No repeat follows */
2590     min = max = 1;
2591     break;
2592     }
2593    
2594     /* First, ensure the minimum number of matches are present. */
2595    
2596     for (i = 1; i <= min; i++)
2597     {
2598 ph10 427 if (eptr >= md->end_subject)
2599 ph10 426 {
2600     SCHECK_PARTIAL();
2601 ph10 510 MRRETURN(MATCH_NOMATCH);
2602 ph10 427 }
2603 ph10 384 GETCHARINCTEST(c, eptr);
2604 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2605 nigel 77 }
2606    
2607     /* If max == min we can continue with the main loop without the
2608     need to recurse. */
2609    
2610     if (min == max) continue;
2611    
2612     /* If minimizing, keep testing the rest of the expression and advancing
2613     the pointer while it matches the class. */
2614    
2615     if (minimize)
2616     {
2617     for (fi = min;; fi++)
2618     {
2619 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2620 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2621 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2622 ph10 427 if (eptr >= md->end_subject)
2623 ph10 426 {
2624 ph10 427 SCHECK_PARTIAL();
2625 ph10 510 MRRETURN(MATCH_NOMATCH);
2626 ph10 427 }
2627 ph10 384 GETCHARINCTEST(c, eptr);
2628 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2629 nigel 77 }
2630     /* Control never gets here */
2631     }
2632    
2633     /* If maximizing, find the longest possible run, then work backwards. */
2634    
2635     else
2636     {
2637     pp = eptr;
2638     for (i = min; i < max; i++)
2639     {
2640     int len = 1;
2641 ph10 463 if (eptr >= md->end_subject)
2642 ph10 462 {
2643 ph10 463 SCHECK_PARTIAL();
2644 ph10 462 break;
2645 ph10 463 }
2646 ph10 384 GETCHARLENTEST(c, eptr, len);
2647 nigel 77 if (!_pcre_xclass(c, data)) break;
2648     eptr += len;
2649     }
2650     for(;;)
2651     {
2652 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2653 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2654     if (eptr-- == pp) break; /* Stop if tried at original pos */
2655 ph10 214 if (utf8) BACKCHAR(eptr);
2656 nigel 77 }
2657 ph10 510 MRRETURN(MATCH_NOMATCH);
2658 nigel 77 }
2659    
2660     /* Control never gets here */
2661     }
2662     #endif /* End of XCLASS */
2663    
2664     /* Match a single character, casefully */
2665    
2666     case OP_CHAR:
2667     #ifdef SUPPORT_UTF8
2668     if (utf8)
2669     {
2670     length = 1;
2671     ecode++;
2672     GETCHARLEN(fc, ecode, length);
2673 ph10 443 if (length > md->end_subject - eptr)
2674 ph10 428 {
2675     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2676 ph10 510 MRRETURN(MATCH_NOMATCH);
2677 ph10 443 }
2678 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2679 nigel 77 }
2680     else
2681     #endif
2682    
2683     /* Non-UTF-8 mode */
2684     {
2685 ph10 443 if (md->end_subject - eptr < 1)
2686 ph10 428 {
2687     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2688 ph10 510 MRRETURN(MATCH_NOMATCH);
2689 ph10 443 }
2690 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2691 nigel 77 ecode += 2;
2692     }
2693     break;
2694    
2695     /* Match a single character, caselessly */
2696    
2697     case OP_CHARNC:
2698     #ifdef SUPPORT_UTF8
2699     if (utf8)
2700     {
2701     length = 1;
2702     ecode++;
2703     GETCHARLEN(fc, ecode, length);
2704    
2705 ph10 443 if (length > md->end_subject - eptr)
2706 ph10 428 {
2707     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2708 ph10 510 MRRETURN(MATCH_NOMATCH);
2709 ph10 443 }
2710 nigel 77
2711     /* If the pattern character's value is < 128, we have only one byte, and
2712     can use the fast lookup table. */
2713    
2714     if (fc < 128)
2715     {
2716 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2717 nigel 77 }
2718    
2719     /* Otherwise we must pick up the subject character */
2720    
2721     else
2722     {
2723 nigel 93 unsigned int dc;
2724 nigel 77 GETCHARINC(dc, eptr);
2725     ecode += length;
2726    
2727     /* If we have Unicode property support, we can use it to test the other
2728 nigel 87 case of the character, if there is one. */
2729 nigel 77
2730     if (fc != dc)
2731     {
2732     #ifdef SUPPORT_UCP
2733 ph10 349 if (dc != UCD_OTHERCASE(fc))
2734 nigel 77 #endif
2735 ph10 510 MRRETURN(MATCH_NOMATCH);
2736 nigel 77 }
2737     }
2738     }
2739     else
2740     #endif /* SUPPORT_UTF8 */
2741    
2742     /* Non-UTF-8 mode */
2743     {
2744 ph10 443 if (md->end_subject - eptr < 1)
2745 ph10 428 {
2746 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2747 ph10 510 MRRETURN(MATCH_NOMATCH);
2748 ph10 443 }
2749 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2750 nigel 77 ecode += 2;
2751     }
2752     break;
2753    
2754 nigel 93 /* Match a single character repeatedly. */
2755 nigel 77
2756     case OP_EXACT:
2757     min = max = GET2(ecode, 1);
2758     ecode += 3;
2759     goto REPEATCHAR;
2760    
2761 nigel 93 case OP_POSUPTO:
2762     possessive = TRUE;
2763     /* Fall through */
2764    
2765 nigel 77 case OP_UPTO:
2766     case OP_MINUPTO:
2767     min = 0;
2768     max = GET2(ecode, 1);
2769     minimize = *ecode == OP_MINUPTO;
2770     ecode += 3;
2771     goto REPEATCHAR;
2772    
2773 nigel 93 case OP_POSSTAR:
2774     possessive = TRUE;
2775     min = 0;
2776     max = INT_MAX;
2777     ecode++;
2778     goto REPEATCHAR;
2779    
2780     case OP_POSPLUS:
2781     possessive = TRUE;
2782     min = 1;
2783     max = INT_MAX;
2784     ecode++;
2785     goto REPEATCHAR;
2786    
2787     case OP_POSQUERY:
2788     possessive = TRUE;
2789     min = 0;
2790     max = 1;
2791     ecode++;
2792     goto REPEATCHAR;
2793    
2794 nigel 77 case OP_STAR:
2795     case OP_MINSTAR:
2796     case OP_PLUS:
2797     case OP_MINPLUS:
2798     case OP_QUERY:
2799     case OP_MINQUERY:
2800     c = *ecode++ - OP_STAR;
2801     minimize = (c & 1) != 0;
2802 ph10 443
2803 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2804     max = rep_max[c]; /* zero for max => infinity */
2805     if (max == 0) max = INT_MAX;
2806    
2807 ph10 426 /* Common code for all repeated single-character matches. */
2808 nigel 77
2809     REPEATCHAR:
2810     #ifdef SUPPORT_UTF8
2811     if (utf8)
2812     {
2813     length = 1;
2814     charptr = ecode;
2815     GETCHARLEN(fc, ecode, length);
2816     ecode += length;
2817    
2818     /* Handle multibyte character matching specially here. There is
2819     support for caseless matching if UCP support is present. */
2820    
2821     if (length > 1)
2822     {
2823     #ifdef SUPPORT_UCP
2824 nigel 93 unsigned int othercase;
2825 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2826 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2827 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2828 ph10 115 else oclength = 0;
2829 nigel 77 #endif /* SUPPORT_UCP */
2830    
2831     for (i = 1; i <= min; i++)
2832     {
2833 ph10 426 if (eptr <= md->end_subject - length &&
2834     memcmp(eptr, charptr, length) == 0) eptr += length;
2835 ph10 123 #ifdef SUPPORT_UCP
2836 ph10 426 else if (oclength > 0 &&
2837     eptr <= md->end_subject - oclength &&
2838     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2839     #endif /* SUPPORT_UCP */
2840 nigel 77 else
2841     {
2842 ph10 426 CHECK_PARTIAL();
2843 ph10 510 MRRETURN(MATCH_NOMATCH);
2844 nigel 77 }
2845     }
2846    
2847     if (min == max) continue;
2848    
2849     if (minimize)
2850     {
2851     for (fi = min;; fi++)
2852     {
2853 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2854 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2855 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2856 ph10 426 if (eptr <= md->end_subject - length &&
2857     memcmp(eptr, charptr, length) == 0) eptr += length;
2858 ph10 123 #ifdef SUPPORT_UCP
2859 ph10 426 else if (oclength > 0 &&
2860     eptr <= md->end_subject - oclength &&
2861     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2862     #endif /* SUPPORT_UCP */
2863 nigel 77 else
2864     {
2865 ph10 426 CHECK_PARTIAL();
2866 ph10 510 MRRETURN(MATCH_NOMATCH);
2867 nigel 77 }
2868     }
2869     /* Control never gets here */
2870     }
2871 nigel 93
2872     else /* Maximize */
2873 nigel 77 {
2874     pp = eptr;
2875     for (i = min; i < max; i++)
2876     {
2877 ph10 426 if (eptr <= md->end_subject - length &&
2878     memcmp(eptr, charptr, length) == 0) eptr += length;
2879 ph10 123 #ifdef SUPPORT_UCP
2880 ph10 426 else if (oclength > 0 &&
2881     eptr <= md->end_subject - oclength &&
2882     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2883     #endif /* SUPPORT_UCP */
2884 ph10 463 else
2885 ph10 462 {
2886 ph10 463 CHECK_PARTIAL();
2887 ph10 462 break;
2888 ph10 463 }
2889 nigel 77 }
2890 nigel 93
2891     if (possessive) continue;
2892 ph10 427
2893 ph10 120 for(;;)
2894 ph10 426 {
2895     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2896     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2897 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2898 ph10 115 #ifdef SUPPORT_UCP
2899 ph10 426 eptr--;
2900     BACKCHAR(eptr);
2901 ph10 123 #else /* without SUPPORT_UCP */
2902 ph10 426 eptr -= length;
2903 ph10 123 #endif /* SUPPORT_UCP */
2904 ph10 426 }
2905 nigel 77 }
2906     /* Control never gets here */
2907     }
2908    
2909     /* If the length of a UTF-8 character is 1, we fall through here, and
2910     obey the code as for non-UTF-8 characters below, though in this case the
2911     value of fc will always be < 128. */
2912     }
2913     else
2914     #endif /* SUPPORT_UTF8 */
2915    
2916     /* When not in UTF-8 mode, load a single-byte character. */
2917    
2918 ph10 426 fc = *ecode++;
2919 ph10 443
2920 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2921     may not be in UTF-8 mode. The code is duplicated for the caseless and
2922     caseful cases, for speed, since matching characters is likely to be quite
2923     common. First, ensure the minimum number of matches are present. If min =
2924     max, continue at the same level without recursing. Otherwise, if
2925     minimizing, keep trying the rest of the expression and advancing one
2926     matching character if failing, up to the maximum. Alternatively, if
2927     maximizing, find the maximum number of characters and work backwards. */
2928    
2929     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2930     max, eptr));
2931    
2932     if ((ims & PCRE_CASELESS) != 0)
2933     {
2934     fc = md->lcc[fc];
2935     for (i = 1; i <= min; i++)
2936 ph10 426 {
2937     if (eptr >= md->end_subject)
2938     {
2939     SCHECK_PARTIAL();
2940 ph10 510 MRRETURN(MATCH_NOMATCH);
2941 ph10 426 }
2942 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2943 ph10 426 }
2944 nigel 77 if (min == max) continue;
2945     if (minimize)
2946     {
2947     for (fi = min;; fi++)
2948     {
2949 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2950 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2951 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2952 ph10 426 if (eptr >= md->end_subject)
2953     {
2954 ph10 427 SCHECK_PARTIAL();
2955 ph10 510 MRRETURN(MATCH_NOMATCH);
2956 ph10 426 }
2957 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2958 nigel 77 }
2959     /* Control never gets here */
2960     }
2961 nigel 93 else /* Maximize */
2962 nigel 77 {
2963     pp = eptr;
2964     for (i = min; i < max; i++)
2965     {
2966 ph10 463 if (eptr >= md->end_subject)
2967 ph10 462 {
2968     SCHECK_PARTIAL();
2969     break;
2970 ph10 463 }
2971 ph10 462 if (fc != md->lcc[*eptr]) break;
2972 nigel 77 eptr++;
2973     }
2974 ph10 427
2975 nigel 93 if (possessive) continue;
2976 ph10 427
2977 nigel 77 while (eptr >= pp)
2978     {
2979 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2980 nigel 77 eptr--;
2981     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982     }
2983 ph10 510 MRRETURN(MATCH_NOMATCH);
2984 nigel 77 }
2985     /* Control never gets here */
2986     }
2987    
2988     /* Caseful comparisons (includes all multi-byte characters) */
2989    
2990     else
2991     {
2992 ph10 427 for (i = 1; i <= min; i++)
2993 ph10 426 {
2994     if (eptr >= md->end_subject)
2995     {
2996     SCHECK_PARTIAL();
2997 ph10 510 MRRETURN(MATCH_NOMATCH);
2998 ph10 426 }
2999 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3000 ph10 427 }
3001 ph10 443
3002 nigel 77 if (min == max) continue;
3003 ph10 443
3004 nigel 77 if (minimize)
3005     {
3006     for (fi = min;; fi++)
3007     {
3008 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3009 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3011 ph10 426 if (eptr >= md->end_subject)
3012 ph10 427 {
3013 ph10 426 SCHECK_PARTIAL();
3014 ph10 510 MRRETURN(MATCH_NOMATCH);
3015 ph10 427 }
3016 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3017 nigel 77 }
3018     /* Control never gets here */
3019     }
3020 nigel 93 else /* Maximize */
3021 nigel 77 {
3022     pp = eptr;
3023     for (i = min; i < max; i++)
3024     {
3025 ph10 463 if (eptr >= md->end_subject)
3026 ph10 462 {
3027 ph10 463 SCHECK_PARTIAL();
3028 ph10 462 break;
3029 ph10 463 }
3030 ph10 462 if (fc != *eptr) break;
3031 nigel 77 eptr++;
3032     }
3033 nigel 93 if (possessive) continue;
3034 ph10 443
3035 nigel 77 while (eptr >= pp)
3036     {
3037 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3038 nigel 77 eptr--;
3039     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040     }
3041 ph10 510 MRRETURN(MATCH_NOMATCH);
3042 nigel 77 }
3043     }
3044     /* Control never gets here */
3045    
3046     /* Match a negated single one-byte character. The character we are
3047     checking can be multibyte. */
3048    
3049     case OP_NOT:
3050 ph10 443 if (eptr >= md->end_subject)
3051 ph10 428 {
3052 ph10 443 SCHECK_PARTIAL();
3053 ph10 510 MRRETURN(MATCH_NOMATCH);
3054 ph10 443 }
3055 nigel 77 ecode++;
3056     GETCHARINCTEST(c, eptr);
3057     if ((ims & PCRE_CASELESS) != 0)
3058     {
3059     #ifdef SUPPORT_UTF8
3060     if (c < 256)
3061     #endif
3062     c = md->lcc[c];
3063 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3064 nigel 77 }
3065     else
3066     {
3067 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3068 nigel 77 }
3069     break;
3070    
3071     /* Match a negated single one-byte character repeatedly. This is almost a
3072     repeat of the code for a repeated single character, but I haven't found a
3073     nice way of commoning these up that doesn't require a test of the
3074     positive/negative option for each character match. Maybe that wouldn't add
3075     very much to the time taken, but character matching *is* what this is all
3076     about... */
3077    
3078     case OP_NOTEXACT:
3079     min = max = GET2(ecode, 1);
3080     ecode += 3;
3081     goto REPEATNOTCHAR;
3082    
3083     case OP_NOTUPTO:
3084     case OP_NOTMINUPTO:
3085     min = 0;
3086     max = GET2(ecode, 1);
3087     minimize = *ecode == OP_NOTMINUPTO;
3088     ecode += 3;
3089     goto REPEATNOTCHAR;
3090    
3091 nigel 93 case OP_NOTPOSSTAR:
3092     possessive = TRUE;
3093     min = 0;
3094     max = INT_MAX;
3095     ecode++;
3096     goto REPEATNOTCHAR;
3097    
3098     case OP_NOTPOSPLUS:
3099     possessive = TRUE;
3100     min = 1;
3101     max = INT_MAX;
3102     ecode++;
3103     goto REPEATNOTCHAR;
3104    
3105     case OP_NOTPOSQUERY:
3106     possessive = TRUE;
3107     min = 0;
3108     max = 1;
3109     ecode++;
3110     goto REPEATNOTCHAR;
3111    
3112     case OP_NOTPOSUPTO:
3113     possessive = TRUE;
3114     min = 0;
3115     max = GET2(ecode, 1);
3116     ecode += 3;
3117     goto REPEATNOTCHAR;
3118    
3119 nigel 77 case OP_NOTSTAR:
3120     case OP_NOTMINSTAR:
3121     case OP_NOTPLUS:
3122     case OP_NOTMINPLUS:
3123     case OP_NOTQUERY:
3124     case OP_NOTMINQUERY:
3125     c = *ecode++ - OP_NOTSTAR;
3126     minimize = (c & 1) != 0;
3127     min = rep_min[c]; /* Pick up values from tables; */
3128     max = rep_max[c]; /* zero for max => infinity */
3129     if (max == 0) max = INT_MAX;
3130    
3131 ph10 426 /* Common code for all repeated single-byte matches. */
3132 nigel 77
3133     REPEATNOTCHAR:
3134     fc = *ecode++;
3135    
3136     /* The code is duplicated for the caseless and caseful cases, for speed,
3137     since matching characters is likely to be quite common. First, ensure the
3138     minimum number of matches are present. If min = max, continue at the same
3139     level without recursing. Otherwise, if minimizing, keep trying the rest of
3140     the expression and advancing one matching character if failing, up to the
3141     maximum. Alternatively, if maximizing, find the maximum number of
3142     characters and work backwards. */
3143    
3144     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3145     max, eptr));
3146    
3147     if ((ims & PCRE_CASELESS) != 0)
3148     {
3149     fc = md->lcc[fc];
3150    
3151     #ifdef SUPPORT_UTF8
3152     /* UTF-8 mode */
3153     if (utf8)
3154     {
3155 nigel 93 register unsigned int d;
3156 nigel 77 for (i = 1; i <= min; i++)
3157     {
3158 ph10 426 if (eptr >= md->end_subject)
3159     {
3160     SCHECK_PARTIAL();
3161 ph10 510 MRRETURN(MATCH_NOMATCH);
3162 ph10 427 }
3163 nigel 77 GETCHARINC(d, eptr);
3164     if (d < 256) d = md->lcc[d];
3165 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3166 nigel 77 }
3167     }
3168     else
3169     #endif
3170    
3171     /* Not UTF-8 mode */
3172     {
3173     for (i = 1; i <= min; i++)
3174 ph10 426 {
3175     if (eptr >= md->end_subject)
3176     {
3177     SCHECK_PARTIAL();
3178 ph10 510 MRRETURN(MATCH_NOMATCH);
3179 ph10 427 }
3180 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3181 ph10 427 }
3182 nigel 77 }
3183    
3184     if (min == max) continue;
3185    
3186     if (minimize)
3187     {
3188     #ifdef SUPPORT_UTF8
3189     /* UTF-8 mode */
3190     if (utf8)
3191     {
3192 nigel 93 register unsigned int d;
3193 nigel 77 for (fi = min;; fi++)
3194     {
3195 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3196 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3197 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3198 ph10 427 if (eptr >= md->end_subject)
3199 ph10 426 {
3200 ph10 427 SCHECK_PARTIAL();
3201 ph10 510 MRRETURN(MATCH_NOMATCH);
3202 ph10 427 }
3203 nigel 77 GETCHARINC(d, eptr);
3204     if (d < 256) d = md->lcc[d];
3205 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3206 nigel 77 }
3207     }
3208     else
3209     #endif
3210     /* Not UTF-8 mode */
3211     {
3212     for (fi = min;; fi++)
3213     {
3214 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3215 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3216 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3217 ph10 426 if (eptr >= md->end_subject)
3218     {
3219     SCHECK_PARTIAL();
3220 ph10 510 MRRETURN(MATCH_NOMATCH);
3221 ph10 426 }
3222 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3223 nigel 77 }
3224     }
3225     /* Control never gets here */
3226     }
3227    
3228     /* Maximize case */
3229    
3230     else
3231     {
3232     pp = eptr;
3233    
3234     #ifdef SUPPORT_UTF8
3235     /* UTF-8 mode */
3236     if (utf8)
3237     {
3238 nigel 93 register unsigned int d;
3239 nigel 77 for (i = min; i < max; i++)
3240     {
3241     int len = 1;
3242 ph10 463 if (eptr >= md->end_subject)
3243 ph10 462 {
3244 ph10 463 SCHECK_PARTIAL();
3245 ph10 462 break;
3246 ph10 463 }
3247 nigel 77 GETCHARLEN(d, eptr, len);
3248     if (d < 256) d = md->lcc[d];
3249     if (fc == d) break;
3250     eptr += len;
3251     }
3252 nigel 93 if (possessive) continue;
3253     for(;;)
3254 nigel 77 {
3255 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3256 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257     if (eptr-- == pp) break; /* Stop if tried at original pos */
3258     BACKCHAR(eptr);
3259     }
3260     }
3261     else
3262     #endif
3263     /* Not UTF-8 mode */
3264     {
3265     for (i = min; i < max; i++)
3266     {
3267 ph10 463 if (eptr >= md->end_subject)
3268 ph10 462 {
3269     SCHECK_PARTIAL();
3270     break;
3271 ph10 463 }
3272 ph10 462 if (fc == md->lcc[*eptr]) break;
3273 nigel 77 eptr++;
3274     }
3275 nigel 93 if (possessive) continue;
3276 nigel 77 while (eptr >= pp)
3277     {
3278 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3279 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3280     eptr--;
3281     }
3282     }
3283    
3284 ph10 510 MRRETURN(MATCH_NOMATCH);
3285 nigel 77 }
3286     /* Control never gets here */
3287     }
3288    
3289     /* Caseful comparisons */
3290    
3291     else
3292     {
3293     #ifdef SUPPORT_UTF8
3294     /* UTF-8 mode */
3295     if (utf8)
3296     {
3297 nigel 93 register unsigned int d;
3298 nigel 77 for (i = 1; i <= min; i++)
3299     {
3300 ph10 426 if (eptr >= md->end_subject)
3301     {
3302     SCHECK_PARTIAL();
3303 ph10 510 MRRETURN(MATCH_NOMATCH);
3304 ph10 427 }
3305 nigel 77 GETCHARINC(d, eptr);
3306 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3307 nigel 77 }
3308     }
3309     else
3310     #endif
3311     /* Not UTF-8 mode */
3312     {
3313     for (i = 1; i <= min; i++)
3314 ph10 426 {
3315     if (eptr >= md->end_subject)
3316     {
3317     SCHECK_PARTIAL();
3318 ph10 510 MRRETURN(MATCH_NOMATCH);
3319 ph10 427 }
3320 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3321 ph10 427 }
3322 nigel 77 }
3323    
3324     if (min == max) continue;
3325    
3326     if (minimize)
3327     {
3328     #ifdef SUPPORT_UTF8
3329     /* UTF-8 mode */
3330     if (utf8)
3331     {
3332 nigel 93 register unsigned int d;
3333 nigel 77 for (fi = min;; fi++)
3334     {
3335 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3336 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3337 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3338 ph10 427 if (eptr >= md->end_subject)
3339 ph10 426 {
3340 ph10 427 SCHECK_PARTIAL();
3341 ph10 510 MRRETURN(MATCH_NOMATCH);
3342 ph10 427 }
3343 nigel 77 GETCHARINC(d, eptr);
3344 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3345 nigel 77 }
3346     }
3347     else
3348     #endif
3349     /* Not UTF-8 mode */
3350     {
3351     for (fi = min;; fi++)
3352     {
3353 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3354 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3355 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3356 ph10 426 if (eptr >= md->end_subject)
3357     {
3358     SCHECK_PARTIAL();
3359 ph10 510 MRRETURN(MATCH_NOMATCH);
3360 ph10 427 }
3361 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3362 nigel 77 }
3363     }
3364     /* Control never gets here */
3365     }
3366    
3367     /* Maximize case */
3368    
3369     else
3370     {
3371     pp = eptr;
3372    
3373     #ifdef SUPPORT_UTF8
3374     /* UTF-8 mode */
3375     if (utf8)
3376     {
3377 nigel 93 register unsigned int d;
3378 nigel 77 for (i = min; i < max; i++)
3379     {
3380     int len = 1;
3381 ph10 463 if (eptr >= md->end_subject)
3382 ph10 462 {
3383 ph10 463 SCHECK_PARTIAL();
3384 ph10 462 break;
3385 ph10 463 }
3386 nigel 77 GETCHARLEN(d, eptr, len);
3387     if (fc == d) break;
3388     eptr += len;
3389     }
3390 nigel 93 if (possessive) continue;
3391 nigel 77 for(;;)
3392     {
3393 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3394 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395     if (eptr-- == pp) break; /* Stop if tried at original pos */
3396     BACKCHAR(eptr);
3397     }
3398     }
3399     else
3400     #endif
3401     /* Not UTF-8 mode */
3402     {
3403     for (i = min; i < max; i++)
3404     {
3405 ph10 463 if (eptr >= md->end_subject)
3406 ph10 462 {
3407 ph10 463 SCHECK_PARTIAL();
3408 ph10 462 break;
3409 ph10 463 }
3410 ph10 462 if (fc == *eptr) break;
3411 nigel 77 eptr++;
3412     }
3413 nigel 93 if (possessive) continue;
3414 nigel 77 while (eptr >= pp)
3415     {
3416 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3417 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418     eptr--;
3419     }
3420     }
3421    
3422 ph10 510 MRRETURN(MATCH_NOMATCH);
3423 nigel 77 }
3424     }
3425     /* Control never gets here */
3426    
3427     /* Match a single character type repeatedly; several different opcodes
3428     share code. This is very similar to the code for single characters, but we
3429     repeat it in the interests of efficiency. */
3430    
3431     case OP_TYPEEXACT:
3432     min = max = GET2(ecode, 1);
3433     minimize = TRUE;
3434     ecode += 3;
3435     goto REPEATTYPE;
3436    
3437     case OP_TYPEUPTO:
3438     case OP_TYPEMINUPTO:
3439     min = 0;
3440     max = GET2(ecode, 1);
3441     minimize = *ecode == OP_TYPEMINUPTO;
3442     ecode += 3;
3443     goto REPEATTYPE;
3444    
3445 nigel 93 case OP_TYPEPOSSTAR:
3446     possessive = TRUE;
3447     min = 0;
3448     max = INT_MAX;
3449     ecode++;
3450     goto REPEATTYPE;
3451    
3452     case OP_TYPEPOSPLUS:
3453     possessive = TRUE;
3454     min = 1;
3455     max = INT_MAX;
3456     ecode++;
3457     goto REPEATTYPE;
3458    
3459     case OP_TYPEPOSQUERY:
3460     possessive = TRUE;
3461     min = 0;
3462     max = 1;
3463     ecode++;
3464     goto REPEATTYPE;
3465    
3466     case OP_TYPEPOSUPTO:
3467     possessive = TRUE;
3468     min = 0;
3469     max = GET2(ecode, 1);
3470     ecode += 3;
3471     goto REPEATTYPE;
3472    
3473 nigel 77 case OP_TYPESTAR:
3474     case OP_TYPEMINSTAR:
3475     case OP_TYPEPLUS:
3476     case OP_TYPEMINPLUS:
3477     case OP_TYPEQUERY:
3478     case OP_TYPEMINQUERY:
3479     c = *ecode++ - OP_TYPESTAR;
3480     minimize = (c & 1) != 0;
3481     min = rep_min[c]; /* Pick up values from tables; */
3482     max = rep_max[c]; /* zero for max => infinity */
3483     if (max == 0) max = INT_MAX;
3484    
3485     /* Common code for all repeated single character type matches. Note that
3486     in UTF-8 mode, '.' matches a character of any length, but for the other
3487     character types, the valid characters are all one-byte long. */
3488    
3489     REPEATTYPE:
3490     ctype = *ecode++; /* Code for the character type */
3491    
3492     #ifdef SUPPORT_UCP
3493     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3494     {
3495     prop_fail_result = ctype == OP_NOTPROP;
3496     prop_type = *ecode++;
3497 nigel 87 prop_value = *ecode++;
3498 nigel 77 }
3499     else prop_type = -1;
3500     #endif
3501    
3502     /* First, ensure the minimum number of matches are present. Use inline
3503     code for maximizing the speed, and do the type test once at the start
3504 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3505 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3506     and single-bytes. */
3507    
3508     if (min > 0)
3509     {
3510     #ifdef SUPPORT_UCP
3511 nigel 87 if (prop_type >= 0)
3512 nigel 77 {
3513 nigel 87 switch(prop_type)
3514 nigel 77 {
3515 nigel 87 case PT_ANY:
3516 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3517 nigel 87 for (i = 1; i <= min; i++)
3518     {
3519 ph10 427 if (eptr >= md->end_subject)
3520 ph10 426 {
3521 ph10 427 SCHECK_PARTIAL();
3522 ph10 510 MRRETURN(MATCH_NOMATCH);
3523 ph10 427 }
3524 ph10 184 GETCHARINCTEST(c, eptr);
3525 nigel 87 }
3526     break;
3527    
3528     case PT_LAMP:
3529     for (i = 1; i <= min; i++)
3530     {
3531 ph10 427 if (eptr >= md->end_subject)
3532 ph10 426 {
3533 ph10 427 SCHECK_PARTIAL();
3534 ph10 510 MRRETURN(MATCH_NOMATCH);
3535 ph10 427 }
3536 ph10 184 GETCHARINCTEST(c, eptr);
3537 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3538 nigel 87 if ((prop_chartype == ucp_Lu ||
3539     prop_chartype == ucp_Ll ||
3540     prop_chartype == ucp_Lt) == prop_fail_result)
3541 ph10 510 MRRETURN(MATCH_NOMATCH);
3542 nigel 87 }
3543     break;
3544    
3545     case PT_GC:
3546     for (i = 1; i <= min; i++)
3547     {
3548 ph10 427 if (eptr >= md->end_subject)
3549 ph10 426 {
3550 ph10 427 SCHECK_PARTIAL();
3551 ph10 510 MRRETURN(MATCH_NOMATCH);
3552 ph10 427 }
3553 ph10 184 GETCHARINCTEST(c, eptr);
3554 ph10 349 prop_category = UCD_CATEGORY(c);
3555 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3556 ph10 510 MRRETURN(MATCH_NOMATCH);
3557 nigel 87 }
3558     break;
3559    
3560     case PT_PC:
3561     for (i = 1; i <= min; i++)
3562     {
3563 ph10 427 if (eptr >= md->end_subject)
3564 ph10 426 {
3565 ph10 427 SCHECK_PARTIAL();
3566 ph10 510 MRRETURN(MATCH_NOMATCH);
3567 ph10 427 }
3568 ph10 184 GETCHARINCTEST(c, eptr);
3569 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3570 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3571 ph10 510 MRRETURN(MATCH_NOMATCH);
3572 nigel 87 }
3573     break;
3574    
3575     case PT_SC:
3576     for (i = 1; i <= min; i++)
3577     {
3578 ph10 427 if (eptr >= md->end_subject)
3579 ph10 426 {
3580 ph10 427 SCHECK_PARTIAL();
3581 ph10 510 MRRETURN(MATCH_NOMATCH);
3582 ph10 427 }
3583 ph10 184 GETCHARINCTEST(c, eptr);
3584 ph10 349 prop_script = UCD_SCRIPT(c);
3585 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3586 ph10 510 MRRETURN(MATCH_NOMATCH);
3587 nigel 87 }
3588     break;
3589 ph10 527
3590 ph10 517 case PT_ALNUM:
3591     for (i = 1; i <= min; i++)
3592     {
3593     if (eptr >= md->end_subject)
3594     {
3595     SCHECK_PARTIAL();
3596     MRRETURN(MATCH_NOMATCH);
3597     }
3598     GETCHARINCTEST(c, eptr);
3599 ph10 527 prop_category = UCD_CATEGORY(c);
3600     if ((prop_category == ucp_L || prop_category == ucp_N)
3601 ph10 517 == prop_fail_result)
3602     MRRETURN(MATCH_NOMATCH);
3603     }
3604     break;
3605 ph10 527
3606 ph10 517 case PT_SPACE: /* Perl space */
3607     for (i = 1; i <= min; i++)
3608     {
3609     if (eptr >= md->end_subject)
3610     {
3611     SCHECK_PARTIAL();
3612     MRRETURN(MATCH_NOMATCH);
3613     }
3614     GETCHARINCTEST(c, eptr);
3615 ph10 527 prop_category = UCD_CATEGORY(c);
3616     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3617     c == CHAR_FF || c == CHAR_CR)
3618 ph10 517 == prop_fail_result)
3619     MRRETURN(MATCH_NOMATCH);
3620     }
3621     break;
3622 ph10 527
3623 ph10 517 case PT_PXSPACE: /* POSIX space */
3624     for (i = 1; i <= min; i++)
3625     {
3626     if (eptr >= md->end_subject)
3627     {
3628     SCHECK_PARTIAL();
3629     MRRETURN(MATCH_NOMATCH);
3630     }
3631     GETCHARINCTEST(c, eptr);
3632 ph10 527 prop_category = UCD_CATEGORY(c);
3633     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3634     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3635 ph10 517 == prop_fail_result)
3636     MRRETURN(MATCH_NOMATCH);
3637     }
3638     break;
3639 ph10 527
3640     case PT_WORD:
3641 ph10 517 for (i = 1; i <= min; i++)
3642     {
3643     if (eptr >= md->end_subject)
3644     {
3645     SCHECK_PARTIAL();
3646     MRRETURN(MATCH_NOMATCH);
3647     }
3648     GETCHARINCTEST(c, eptr);
3649 ph10 527 prop_category = UCD_CATEGORY(c);
3650 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3651 ph10 527 c == CHAR_UNDERSCORE)
3652 ph10 517 == prop_fail_result)
3653     MRRETURN(MATCH_NOMATCH);
3654     }
3655     break;
3656 ph10 527
3657 ph10 517 /* This should not occur */
3658 nigel 87
3659     default:
3660     RRETURN(PCRE_ERROR_INTERNAL);
3661 nigel 77 }
3662     }
3663    
3664     /* Match extended Unicode sequences. We will get here only if the
3665     support is in the binary; otherwise a compile-time error occurs. */
3666    
3667     else if (ctype == OP_EXTUNI)
3668     {
3669     for (i = 1; i <= min; i++)
3670     {
3671 ph10 427 if (eptr >= md->end_subject)
3672 ph10 426 {
3673 ph10 427 SCHECK_PARTIAL();
3674 ph10 510 MRRETURN(MATCH_NOMATCH);
3675 ph10 427 }
3676 nigel 77 GETCHARINCTEST(c, eptr);
3677 ph10 349 prop_category = UCD_CATEGORY(c);
3678 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3679 nigel 77 while (eptr < md->end_subject)
3680     {
3681     int len = 1;
3682 ph10 426 if (!utf8) c = *eptr;
3683     else { GETCHARLEN(c, eptr, len); }
3684 ph10 349 prop_category = UCD_CATEGORY(c);
3685 nigel 77 if (prop_category != ucp_M) break;
3686     eptr += len;
3687     }
3688     }
3689     }
3690    
3691     else
3692     #endif /* SUPPORT_UCP */
3693    
3694     /* Handle all other cases when the coding is UTF-8 */
3695    
3696     #ifdef SUPPORT_UTF8
3697     if (utf8) switch(ctype)
3698     {
3699     case OP_ANY:
3700     for (i = 1; i <= min; i++)
3701     {
3702 ph10 426 if (eptr >= md->end_subject)
3703     {
3704 ph10 427 SCHECK_PARTIAL();
3705 ph10 510 MRRETURN(MATCH_NOMATCH);
3706 ph10 427 }
3707 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3708 nigel 91 eptr++;
3709 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3710     }
3711     break;
3712    
3713 ph10 341 case OP_ALLANY:
3714     for (i = 1; i <= min; i++)
3715     {
3716 ph10 427 if (eptr >= md->end_subject)
3717 ph10 426 {
3718     SCHECK_PARTIAL();
3719 ph10 510 MRRETURN(MATCH_NOMATCH);
3720 ph10 427 }
3721 ph10 341 eptr++;
3722     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3723     }
3724     break;
3725    
3726 nigel 77 case OP_ANYBYTE:
3727 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3728 nigel 77 eptr += min;
3729     break;
3730    
3731 nigel 93 case OP_ANYNL:
3732     for (i = 1; i <= min; i++)
3733     {
3734 ph10 427 if (eptr >= md->end_subject)
3735 ph10 426 {
3736     SCHECK_PARTIAL();
3737 ph10 510 MRRETURN(MATCH_NOMATCH);
3738 ph10 427 }
3739 nigel 93 GETCHARINC(c, eptr);
3740     switch(c)
3741     {
3742 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3743 nigel 93 case 0x000d:
3744     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3745     break;
3746 ph10 231
3747 nigel 93 case 0x000a:
3748 ph10 231 break;
3749    
3750 nigel 93 case 0x000b:
3751     case 0x000c:
3752     case 0x0085:
3753     case 0x2028:
3754     case 0x2029:
3755 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3756 nigel 93 break;
3757     }
3758     }
3759     break;
3760    
3761 ph10 178 case OP_NOT_HSPACE:
3762     for (i = 1; i <= min; i++)
3763     {
3764 ph10 427 if (eptr >= md->end_subject)
3765 ph10 426 {
3766     SCHECK_PARTIAL();
3767 ph10 510 MRRETURN(MATCH_NOMATCH);
3768 ph10 427 }
3769 ph10 178 GETCHARINC(c, eptr);
3770     switch(c)
3771     {
3772     default: break;
3773     case 0x09: /* HT */
3774     case 0x20: /* SPACE */
3775     case 0xa0: /* NBSP */
3776     case 0x1680: /* OGHAM SPACE MARK */
3777     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3778     case 0x2000: /* EN QUAD */
3779     case 0x2001: /* EM QUAD */
3780     case 0x2002: /* EN SPACE */
3781     case 0x2003: /* EM SPACE */
3782     case 0x2004: /* THREE-PER-EM SPACE */
3783     case 0x2005: /* FOUR-PER-EM SPACE */
3784     case 0x2006: /* SIX-PER-EM SPACE */
3785     case 0x2007: /* FIGURE SPACE */
3786     case 0x2008: /* PUNCTUATION SPACE */
3787     case 0x2009: /* THIN SPACE */
3788     case 0x200A: /* HAIR SPACE */
3789     case 0x202f: /* NARROW NO-BREAK SPACE */
3790     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3791     case 0x3000: /* IDEOGRAPHIC SPACE */
3792 ph10 510 MRRETURN(MATCH_NOMATCH);
3793 ph10 178 }
3794     }
3795     break;
3796 ph10 182
3797 ph10 178 case OP_HSPACE:
3798     for (i = 1; i <= min; i++)
3799     {
3800 ph10 427 if (eptr >= md->end_subject)
3801 ph10 426 {
3802 ph10 427 SCHECK_PARTIAL();
3803 ph10 510 MRRETURN(MATCH_NOMATCH);
3804 ph10 427 }
3805 ph10 178 GETCHARINC(c, eptr);
3806     switch(c)
3807     {
3808 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3809 ph10 178 case 0x09: /* HT */
3810     case 0x20: /* SPACE */
3811     case 0xa0: /* NBSP */
3812     case 0x1680: /* OGHAM SPACE MARK */
3813     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3814     case 0x2000: /* EN QUAD */
3815     case 0x2001: /* EM QUAD */
3816     case 0x2002: /* EN SPACE */
3817     case 0x2003: /* EM SPACE */
3818     case 0x2004: /* THREE-PER-EM SPACE */
3819     case 0x2005: /* FOUR-PER-EM SPACE */
3820     case 0x2006: /* SIX-PER-EM SPACE */
3821     case 0x2007: /* FIGURE SPACE */
3822     case 0x2008: /* PUNCTUATION SPACE */
3823     case 0x2009: /* THIN SPACE */
3824     case 0x200A: /* HAIR SPACE */
3825     case 0x202f: /* NARROW NO-BREAK SPACE */
3826     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3827     case 0x3000: /* IDEOGRAPHIC SPACE */
3828     break;
3829     }
3830     }
3831     break;
3832 ph10 182
3833 ph10 178 case OP_NOT_VSPACE:
3834     for (i = 1; i <= min; i++)
3835     {
3836 ph10 427 if (eptr >= md->end_subject)
3837 ph10 426 {
3838 ph10 427 SCHECK_PARTIAL();
3839 ph10 510 MRRETURN(MATCH_NOMATCH);
3840 ph10 427 }
3841 ph10 178 GETCHARINC(c, eptr);
3842     switch(c)
3843     {
3844     default: break;
3845     case 0x0a: /* LF */
3846     case 0x0b: /* VT */
3847     case 0x0c: /* FF */
3848     case 0x0d: /* CR */
3849     case 0x85: /* NEL */
3850     case 0x2028: /* LINE SEPARATOR */
3851     case 0x2029: /* PARAGRAPH SEPARATOR */
3852 ph10 510 MRRETURN(MATCH_NOMATCH);
3853 ph10 178 }
3854     }
3855     break;
3856 ph10 182
3857 ph10 178 case OP_VSPACE:
3858     for (i = 1; i <= min; i++)
3859     {
3860 ph10 427 if (eptr >= md->end_subject)
3861 ph10 426 {
3862 ph10 427 SCHECK_PARTIAL();
3863 ph10 510 MRRETURN(MATCH_NOMATCH);
3864 ph10 427 }
3865 ph10 178 GETCHARINC(c, eptr);
3866     switch(c)
3867     {