/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 517 - (hide annotations) (download)
Wed May 5 10:44:20 2010 UTC (3 years ago) by ph10
File MIME type: text/plain
File size: 183493 byte(s)
Add new special properties Xan, Xps, Xsp, Xwd to help with \w etc.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 512 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58 };
259 ph10 164
260 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
261 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
262 ph10 501 actually used in this definition. */
263 nigel 77
264     #ifndef NO_RECURSE
265     #define REGISTER register
266 ph10 164
267 ph10 475 #ifdef PCRE_DEBUG
268 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269 nigel 87 { \
270     printf("match() called in line %d\n", __LINE__); \
271 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
272 nigel 87 printf("to line %d\n", __LINE__); \
273     }
274     #define RRETURN(ra) \
275     { \
276     printf("match() returned %d from line %d ", ra, __LINE__); \
277     return ra; \
278     }
279     #else
280 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
282 nigel 77 #define RRETURN(ra) return ra
283 nigel 87 #endif
284    
285 nigel 77 #else
286    
287    
288 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
289     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
290     argument of match(), which never changes. */
291 nigel 77
292     #define REGISTER
293    
294 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
295 nigel 77 {\
296     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
297 ph10 164 frame->Xwhere = rw; \
298     newframe->Xeptr = ra;\
299     newframe->Xecode = rb;\
300 ph10 168 newframe->Xmstart = mstart;\
301 ph10 501 newframe->Xmarkptr = markptr;\
302 ph10 164 newframe->Xoffset_top = rc;\
303     newframe->Xims = re;\
304     newframe->Xeptrb = rf;\
305     newframe->Xflags = rg;\
306     newframe->Xrdepth = frame->Xrdepth + 1;\
307     newframe->Xprevframe = frame;\
308     frame = newframe;\
309     DPRINTF(("restarting from line %d\n", __LINE__));\
310     goto HEAP_RECURSE;\
311     L_##rw:\
312     DPRINTF(("jumped back to line %d\n", __LINE__));\
313 nigel 77 }
314    
315     #define RRETURN(ra)\
316     {\
317     heapframe *newframe = frame;\
318     frame = newframe->Xprevframe;\
319     (pcre_stack_free)(newframe);\
320     if (frame != NULL)\
321     {\
322 ph10 164 rrc = ra;\
323     goto HEAP_RETURN;\
324 nigel 77 }\
325     return ra;\
326     }
327    
328    
329     /* Structure for remembering the local variables in a private frame */
330    
331     typedef struct heapframe {
332     struct heapframe *Xprevframe;
333    
334     /* Function arguments that may change */
335    
336 ph10 409 USPTR Xeptr;
337 nigel 77 const uschar *Xecode;
338 ph10 409 USPTR Xmstart;
339 ph10 501 USPTR Xmarkptr;
340 nigel 77 int Xoffset_top;
341     long int Xims;
342     eptrblock *Xeptrb;
343     int Xflags;
344 nigel 91 unsigned int Xrdepth;
345 nigel 77
346     /* Function local variables */
347    
348 ph10 409 USPTR Xcallpat;
349 ph10 406 #ifdef SUPPORT_UTF8
350 ph10 409 USPTR Xcharptr;
351 ph10 406 #endif
352 ph10 409 USPTR Xdata;
353     USPTR Xnext;
354     USPTR Xpp;
355     USPTR Xprev;
356     USPTR Xsaved_eptr;
357 nigel 77
358     recursion_info Xnew_recursive;
359    
360     BOOL Xcur_is_word;
361     BOOL Xcondition;
362     BOOL Xprev_is_word;
363    
364     unsigned long int Xoriginal_ims;
365    
366     #ifdef SUPPORT_UCP
367     int Xprop_type;
368 nigel 87 int Xprop_value;
369 nigel 77 int Xprop_fail_result;
370     int Xprop_category;
371     int Xprop_chartype;
372 nigel 87 int Xprop_script;
373 ph10 123 int Xoclength;
374     uschar Xocchars[8];
375 nigel 77 #endif
376    
377 ph10 403 int Xcodelink;
378 nigel 77 int Xctype;
379 nigel 93 unsigned int Xfc;
380 nigel 77 int Xfi;
381     int Xlength;
382     int Xmax;
383     int Xmin;
384     int Xnumber;
385     int Xoffset;
386     int Xop;
387     int Xsave_capture_last;
388     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
389     int Xstacksave[REC_STACK_SAVE_MAX];
390    
391     eptrblock Xnewptrb;
392    
393 ph10 164 /* Where to jump back to */
394 nigel 77
395 ph10 164 int Xwhere;
396 ph10 165
397 nigel 77 } heapframe;
398    
399     #endif
400    
401    
402     /***************************************************************************
403     ***************************************************************************/
404    
405    
406    
407     /*************************************************
408     * Match from current position *
409     *************************************************/
410    
411 nigel 93 /* This function is called recursively in many circumstances. Whenever it
412 nigel 77 returns a negative (error) response, the outer incarnation must also return the
413 ph10 426 same response. */
414 nigel 77
415 ph10 426 /* These macros pack up tests that are used for partial matching, and which
416     appears several times in the code. We set the "hit end" flag if the pointer is
417     at the end of the subject and also past the start of the subject (i.e.
418 ph10 427 something has been matched). For hard partial matching, we then return
419     immediately. The second one is used when we already know we are past the end of
420     the subject. */
421 ph10 426
422     #define CHECK_PARTIAL()\
423 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
424 ph10 427 {\
425     md->hitend = TRUE;\
426 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
427 ph10 427 }
428 ph10 426
429     #define SCHECK_PARTIAL()\
430 ph10 462 if (md->partial != 0 && eptr > mstart)\
431 ph10 427 {\
432     md->hitend = TRUE;\
433 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
434 ph10 427 }
435 ph10 426
436 ph10 427
437 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
438     the md structure (e.g. utf8, end_subject) into individual variables to improve
439 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
440     made performance worse.
441    
442     Arguments:
443 nigel 93 eptr pointer to current character in subject
444     ecode pointer to current position in compiled code
445 ph10 168 mstart pointer to the current match start position (can be modified
446 ph10 172 by encountering \K)
447 ph10 501 markptr pointer to the most recent MARK name, or NULL
448 nigel 77 offset_top current top pointer
449     md pointer to "static" info for the match
450     ims current /i, /m, and /s options
451     eptrb pointer to chain of blocks containing eptr at start of
452     brackets - for testing for empty matches
453     flags can contain
454     match_condassert - this is an assertion condition
455 nigel 93 match_cbegroup - this is the start of an unlimited repeat
456     group that can match an empty string
457 nigel 87 rdepth the recursion depth
458 nigel 77
459     Returns: MATCH_MATCH if matched ) these values are >= 0
460     MATCH_NOMATCH if failed to match )
461 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
462 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
463 nigel 87 (e.g. stopped by repeated call or recursion limit)
464 nigel 77 */
465    
466     static int
467 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
468     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
469 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
470 nigel 77 {
471     /* These variables do not need to be preserved over recursion in this function,
472 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
473     "register" because they are used a lot in loops. */
474 nigel 77
475 nigel 91 register int rrc; /* Returns from recursive calls */
476     register int i; /* Used for loops not involving calls to RMATCH() */
477 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
478 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
479 nigel 77
480 nigel 93 BOOL minimize, possessive; /* Quantifier options */
481 ph10 403 int condcode;
482 nigel 93
483 nigel 77 /* When recursion is not being used, all "local" variables that have to be
484     preserved over calls to RMATCH() are part of a "frame" which is obtained from
485     heap storage. Set up the top-level frame here; others are obtained from the
486     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
487    
488     #ifdef NO_RECURSE
489     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
490     frame->Xprevframe = NULL; /* Marks the top level */
491    
492     /* Copy in the original argument variables */
493    
494     frame->Xeptr = eptr;
495     frame->Xecode = ecode;
496 ph10 168 frame->Xmstart = mstart;
497 ph10 501 frame->Xmarkptr = markptr;
498 nigel 77 frame->Xoffset_top = offset_top;
499     frame->Xims = ims;
500     frame->Xeptrb = eptrb;
501     frame->Xflags = flags;
502 nigel 87 frame->Xrdepth = rdepth;
503 nigel 77
504     /* This is where control jumps back to to effect "recursion" */
505    
506     HEAP_RECURSE:
507    
508     /* Macros make the argument variables come from the current frame */
509    
510     #define eptr frame->Xeptr
511     #define ecode frame->Xecode
512 ph10 168 #define mstart frame->Xmstart
513 ph10 501 #define markptr frame->Xmarkptr
514 nigel 77 #define offset_top frame->Xoffset_top
515     #define ims frame->Xims
516     #define eptrb frame->Xeptrb
517     #define flags frame->Xflags
518 nigel 87 #define rdepth frame->Xrdepth
519 nigel 77
520     /* Ditto for the local variables */
521    
522     #ifdef SUPPORT_UTF8
523     #define charptr frame->Xcharptr
524     #endif
525     #define callpat frame->Xcallpat
526 ph10 403 #define codelink frame->Xcodelink
527 nigel 77 #define data frame->Xdata
528     #define next frame->Xnext
529     #define pp frame->Xpp
530     #define prev frame->Xprev
531     #define saved_eptr frame->Xsaved_eptr
532    
533     #define new_recursive frame->Xnew_recursive
534    
535     #define cur_is_word frame->Xcur_is_word
536     #define condition frame->Xcondition
537     #define prev_is_word frame->Xprev_is_word
538    
539     #define original_ims frame->Xoriginal_ims
540    
541     #ifdef SUPPORT_UCP
542     #define prop_type frame->Xprop_type
543 nigel 87 #define prop_value frame->Xprop_value
544 nigel 77 #define prop_fail_result frame->Xprop_fail_result
545     #define prop_category frame->Xprop_category
546     #define prop_chartype frame->Xprop_chartype
547 nigel 87 #define prop_script frame->Xprop_script
548 ph10 115 #define oclength frame->Xoclength
549     #define occhars frame->Xocchars
550 nigel 77 #endif
551    
552     #define ctype frame->Xctype
553     #define fc frame->Xfc
554     #define fi frame->Xfi
555     #define length frame->Xlength
556     #define max frame->Xmax
557     #define min frame->Xmin
558     #define number frame->Xnumber
559     #define offset frame->Xoffset
560     #define op frame->Xop
561     #define save_capture_last frame->Xsave_capture_last
562     #define save_offset1 frame->Xsave_offset1
563     #define save_offset2 frame->Xsave_offset2
564     #define save_offset3 frame->Xsave_offset3
565     #define stacksave frame->Xstacksave
566    
567     #define newptrb frame->Xnewptrb
568    
569     /* When recursion is being used, local variables are allocated on the stack and
570     get preserved during recursion in the normal way. In this environment, fi and
571     i, and fc and c, can be the same variables. */
572    
573 nigel 93 #else /* NO_RECURSE not defined */
574 nigel 77 #define fi i
575     #define fc c
576    
577    
578 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
579     const uschar *charptr; /* in small blocks of the code. My normal */
580     #endif /* style of coding would have declared */
581     const uschar *callpat; /* them within each of those blocks. */
582     const uschar *data; /* However, in order to accommodate the */
583     const uschar *next; /* version of this code that uses an */
584     USPTR pp; /* external "stack" implemented on the */
585     const uschar *prev; /* heap, it is easier to declare them all */
586     USPTR saved_eptr; /* here, so the declarations can be cut */
587     /* out in a block. The only declarations */
588     recursion_info new_recursive; /* within blocks below are for variables */
589     /* that do not have to be preserved over */
590     BOOL cur_is_word; /* a recursive call to RMATCH(). */
591     BOOL condition;
592 nigel 77 BOOL prev_is_word;
593    
594     unsigned long int original_ims;
595    
596     #ifdef SUPPORT_UCP
597     int prop_type;
598 nigel 87 int prop_value;
599 nigel 77 int prop_fail_result;
600     int prop_category;
601     int prop_chartype;
602 nigel 87 int prop_script;
603 ph10 115 int oclength;
604     uschar occhars[8];
605 nigel 77 #endif
606    
607 ph10 399 int codelink;
608 nigel 77 int ctype;
609     int length;
610     int max;
611     int min;
612     int number;
613     int offset;
614     int op;
615     int save_capture_last;
616     int save_offset1, save_offset2, save_offset3;
617     int stacksave[REC_STACK_SAVE_MAX];
618    
619     eptrblock newptrb;
620 nigel 93 #endif /* NO_RECURSE */
621 nigel 77
622     /* These statements are here to stop the compiler complaining about unitialized
623     variables. */
624    
625     #ifdef SUPPORT_UCP
626 nigel 87 prop_value = 0;
627 nigel 77 prop_fail_result = 0;
628     #endif
629    
630 nigel 93
631 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
632     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
633     used. Thanks to Ian Taylor for noticing this possibility and sending the
634     original patch. */
635    
636     TAIL_RECURSE:
637    
638 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
639     are specified by the macro RMATCH and RRETURN is used to return. When
640     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
641 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
642 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
643     complicated macro. It has to be used in one particular way. This shouldn't,
644     however, impact performance when true recursion is being used. */
645 nigel 77
646 ph10 164 #ifdef SUPPORT_UTF8
647     utf8 = md->utf8; /* Local copy of the flag */
648     #else
649     utf8 = FALSE;
650     #endif
651    
652 nigel 87 /* First check that we haven't called match() too many times, or that we
653     haven't exceeded the recursive call limit. */
654    
655 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
656 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
657 nigel 77
658     original_ims = ims; /* Save for resetting on ')' */
659 nigel 91
660 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
661     string, the match_cbegroup flag is set. When this is the case, add the current
662     subject pointer to the chain of such remembered pointers, to be checked when we
663     hit the closing ket, in order to break infinite loops that match no characters.
664 ph10 197 When match() is called in other circumstances, don't add to the chain. The
665     match_cbegroup flag must NOT be used with tail recursion, because the memory
666     block that is used is on the stack, so a new one may be required for each
667     match(). */
668 nigel 77
669 nigel 93 if ((flags & match_cbegroup) != 0)
670 nigel 77 {
671 ph10 197 newptrb.epb_saved_eptr = eptr;
672     newptrb.epb_prev = eptrb;
673     eptrb = &newptrb;
674 nigel 77 }
675    
676 nigel 93 /* Now start processing the opcodes. */
677 nigel 77
678     for (;;)
679     {
680 nigel 93 minimize = possessive = FALSE;
681 nigel 77 op = *ecode;
682 ph10 443
683 nigel 93 switch(op)
684     {
685 ph10 510 case OP_MARK:
686     markptr = ecode + 2;
687     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
688 ph10 512 ims, eptrb, flags, RM55);
689    
690     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
691     argument, and we must check whether that argument matches this MARK's
692     argument. It is passed back in md->start_match_ptr (an overloading of that
693     variable). If it does match, we reset that variable to the current subject
694     position and return MATCH_SKIP. Otherwise, pass back the return code
695 ph10 510 unaltered. */
696 ph10 512
697     if (rrc == MATCH_SKIP_ARG &&
698 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
699     {
700     md->start_match_ptr = eptr;
701     RRETURN(MATCH_SKIP);
702     }
703    
704 ph10 512 if (md->mark == NULL) md->mark = markptr;
705 ph10 510 RRETURN(rrc);
706    
707 ph10 210 case OP_FAIL:
708 ph10 510 MRRETURN(MATCH_NOMATCH);
709 ph10 211
710 ph10 510 case OP_COMMIT:
711     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
712     ims, eptrb, flags, RM52);
713     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
714     MRRETURN(MATCH_COMMIT);
715    
716 ph10 210 case OP_PRUNE:
717     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718     ims, eptrb, flags, RM51);
719     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
720 ph10 510 MRRETURN(MATCH_PRUNE);
721 ph10 211
722 ph10 510 case OP_PRUNE_ARG:
723     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
724 ph10 512 ims, eptrb, flags, RM56);
725 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
726 ph10 510 md->mark = ecode + 2;
727     RRETURN(MATCH_PRUNE);
728 ph10 211
729 ph10 210 case OP_SKIP:
730     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
731     ims, eptrb, flags, RM53);
732     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
733 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
734 ph10 510 MRRETURN(MATCH_SKIP);
735 ph10 211
736 ph10 510 case OP_SKIP_ARG:
737     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
738 ph10 512 ims, eptrb, flags, RM57);
739 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
740 ph10 512
741     /* Pass back the current skip name by overloading md->start_match_ptr and
742     returning the special MATCH_SKIP_ARG return code. This will either be
743     caught by a matching MARK, or get to the top, where it is treated the same
744 ph10 510 as PRUNE. */
745 ph10 512
746 ph10 510 md->start_match_ptr = ecode + 2;
747 ph10 512 RRETURN(MATCH_SKIP_ARG);
748    
749 ph10 210 case OP_THEN:
750     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751 ph10 212 ims, eptrb, flags, RM54);
752 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
753 ph10 510 MRRETURN(MATCH_THEN);
754    
755     case OP_THEN_ARG:
756     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757 ph10 512 ims, eptrb, flags, RM58);
758 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
759     md->mark = ecode + 2;
760 ph10 212 RRETURN(MATCH_THEN);
761 ph10 211
762 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
763     the current subject position in the working slot at the top of the vector.
764     We mustn't change the current values of the data slot, because they may be
765     set from a previous iteration of this group, and be referred to by a
766     reference inside the group.
767 nigel 77
768 nigel 93 If the bracket fails to match, we need to restore this value and also the
769     values of the final offsets, in case they were set by a previous iteration
770     of the same bracket.
771 nigel 77
772 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
773     a non-capturing bracket. Don't worry about setting the flag for the error
774     case here; that is handled in the code for KET. */
775 nigel 77
776 nigel 93 case OP_CBRA:
777     case OP_SCBRA:
778     number = GET2(ecode, 1+LINK_SIZE);
779 nigel 77 offset = number << 1;
780    
781 ph10 475 #ifdef PCRE_DEBUG
782 nigel 93 printf("start bracket %d\n", number);
783     printf("subject=");
784 nigel 77 pchars(eptr, 16, TRUE, md);
785     printf("\n");
786     #endif
787    
788     if (offset < md->offset_max)
789     {
790     save_offset1 = md->offset_vector[offset];
791     save_offset2 = md->offset_vector[offset+1];
792     save_offset3 = md->offset_vector[md->offset_end - number];
793     save_capture_last = md->capture_last;
794    
795     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
796     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
797    
798 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
799 nigel 77 do
800     {
801 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
802     ims, eptrb, flags, RM1);
803 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
804 nigel 77 md->capture_last = save_capture_last;
805     ecode += GET(ecode, 1);
806     }
807     while (*ecode == OP_ALT);
808    
809     DPRINTF(("bracket %d failed\n", number));
810    
811     md->offset_vector[offset] = save_offset1;
812     md->offset_vector[offset+1] = save_offset2;
813     md->offset_vector[md->offset_end - number] = save_offset3;
814    
815 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
816 nigel 77 RRETURN(MATCH_NOMATCH);
817     }
818    
819 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
820     as a non-capturing bracket. */
821 nigel 77
822 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824    
825 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
826 nigel 77
827 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829    
830 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
831     final alternative within the brackets, we would return the result of a
832     recursive call to match() whatever happened. We can reduce stack usage by
833 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
834     is set.*/
835 nigel 77
836 nigel 93 case OP_BRA:
837     case OP_SBRA:
838     DPRINTF(("start non-capturing bracket\n"));
839     flags = (op >= OP_SBRA)? match_cbegroup : 0;
840 nigel 91 for (;;)
841 nigel 77 {
842 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
843 nigel 93 {
844 ph10 197 if (flags == 0) /* Not a possibly empty group */
845     {
846     ecode += _pcre_OP_lengths[*ecode];
847     DPRINTF(("bracket 0 tail recursion\n"));
848     goto TAIL_RECURSE;
849     }
850    
851     /* Possibly empty group; can't use tail recursion. */
852    
853     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
854     eptrb, flags, RM48);
855 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
856     RRETURN(rrc);
857 nigel 93 }
858 nigel 91
859     /* For non-final alternatives, continue the loop for a NOMATCH result;
860     otherwise return. */
861    
862 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
863     eptrb, flags, RM2);
864 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
865 nigel 77 ecode += GET(ecode, 1);
866     }
867 nigel 91 /* Control never reaches here. */
868 nigel 77
869     /* Conditional group: compilation checked that there are no more than
870     two branches. If the condition is false, skipping the first branch takes us
871     past the end if there is only one branch, but that's OK because that is
872 nigel 91 exactly what going to the ket would do. As there is only one branch to be
873     obeyed, we can use tail recursion to avoid using another stack frame. */
874 nigel 77
875     case OP_COND:
876 nigel 93 case OP_SCOND:
877 ph10 399 codelink= GET(ecode, 1);
878 ph10 406
879 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
880     inserted between OP_COND and an assertion condition. */
881 ph10 392
882 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
883     {
884     if (pcre_callout != NULL)
885     {
886     pcre_callout_block cb;
887     cb.version = 1; /* Version 1 of the callout block */
888     cb.callout_number = ecode[LINK_SIZE+2];
889     cb.offset_vector = md->offset_vector;
890     cb.subject = (PCRE_SPTR)md->start_subject;
891     cb.subject_length = md->end_subject - md->start_subject;
892     cb.start_match = mstart - md->start_subject;
893     cb.current_position = eptr - md->start_subject;
894     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
895     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
896     cb.capture_top = offset_top/2;
897     cb.capture_last = md->capture_last;
898     cb.callout_data = md->callout_data;
899 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
900 ph10 381 if (rrc < 0) RRETURN(rrc);
901     }
902     ecode += _pcre_OP_lengths[OP_CALLOUT];
903     }
904 ph10 392
905 ph10 399 condcode = ecode[LINK_SIZE+1];
906 ph10 406
907 ph10 381 /* Now see what the actual condition is */
908 ph10 392
909 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
910 nigel 77 {
911 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
912     {
913 ph10 461 condition = FALSE;
914     ecode += GET(ecode, 1);
915     }
916 ph10 459 else
917 ph10 461 {
918 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
919     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
920 ph10 461
921 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
922     false, but the test was set up by name, scan the table to see if the
923     name refers to any other numbers, and test them. The condition is true
924     if any one is set. */
925 ph10 461
926 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
927     {
928     uschar *slotA = md->name_table;
929     for (i = 0; i < md->name_count; i++)
930 ph10 461 {
931     if (GET2(slotA, 0) == recno) break;
932 ph10 459 slotA += md->name_entry_size;
933     }
934 ph10 461
935 ph10 459 /* Found a name for the number - there can be only one; duplicate
936     names for different numbers are allowed, but not vice versa. First
937     scan down for duplicates. */
938 ph10 461
939 ph10 459 if (i < md->name_count)
940 ph10 461 {
941 ph10 459 uschar *slotB = slotA;
942     while (slotB > md->name_table)
943     {
944     slotB -= md->name_entry_size;
945     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
946     {
947     condition = GET2(slotB, 0) == md->recursive->group_num;
948 ph10 461 if (condition) break;
949     }
950 ph10 459 else break;
951 ph10 461 }
952    
953 ph10 459 /* Scan up for duplicates */
954 ph10 461
955 ph10 459 if (!condition)
956 ph10 461 {
957 ph10 459 slotB = slotA;
958     for (i++; i < md->name_count; i++)
959     {
960     slotB += md->name_entry_size;
961     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
962     {
963     condition = GET2(slotB, 0) == md->recursive->group_num;
964     if (condition) break;
965 ph10 461 }
966 ph10 459 else break;
967 ph10 461 }
968     }
969 ph10 459 }
970 ph10 461 }
971    
972 ph10 459 /* Chose branch according to the condition */
973 ph10 461
974 ph10 459 ecode += condition? 3 : GET(ecode, 1);
975     }
976 ph10 461 }
977 nigel 93
978 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
979 nigel 93 {
980 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
981 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
982 ph10 461
983 ph10 459 /* If the numbered capture is unset, but the reference was by name,
984 ph10 461 scan the table to see if the name refers to any other numbers, and test
985     them. The condition is true if any one is set. This is tediously similar
986     to the code above, but not close enough to try to amalgamate. */
987    
988 ph10 459 if (!condition && condcode == OP_NCREF)
989     {
990 ph10 461 int refno = offset >> 1;
991 ph10 459 uschar *slotA = md->name_table;
992 ph10 461
993 ph10 459 for (i = 0; i < md->name_count; i++)
994 ph10 461 {
995     if (GET2(slotA, 0) == refno) break;
996 ph10 459 slotA += md->name_entry_size;
997     }
998 ph10 461
999     /* Found a name for the number - there can be only one; duplicate names
1000     for different numbers are allowed, but not vice versa. First scan down
1001 ph10 459 for duplicates. */
1002 ph10 461
1003 ph10 459 if (i < md->name_count)
1004 ph10 461 {
1005 ph10 459 uschar *slotB = slotA;
1006     while (slotB > md->name_table)
1007     {
1008     slotB -= md->name_entry_size;
1009     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1010     {
1011     offset = GET2(slotB, 0) << 1;
1012 ph10 461 condition = offset < offset_top &&
1013 ph10 459 md->offset_vector[offset] >= 0;
1014 ph10 461 if (condition) break;
1015     }
1016 ph10 459 else break;
1017 ph10 461 }
1018    
1019 ph10 459 /* Scan up for duplicates */
1020 ph10 461
1021 ph10 459 if (!condition)
1022 ph10 461 {
1023 ph10 459 slotB = slotA;
1024     for (i++; i < md->name_count; i++)
1025     {
1026     slotB += md->name_entry_size;
1027     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1028     {
1029     offset = GET2(slotB, 0) << 1;
1030 ph10 461 condition = offset < offset_top &&
1031 ph10 459 md->offset_vector[offset] >= 0;
1032 ph10 461 if (condition) break;
1033     }
1034 ph10 459 else break;
1035 ph10 461 }
1036     }
1037 ph10 459 }
1038 ph10 461 }
1039    
1040 ph10 459 /* Chose branch according to the condition */
1041    
1042 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1043 nigel 77 }
1044    
1045 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1046 nigel 93 {
1047     condition = FALSE;
1048     ecode += GET(ecode, 1);
1049     }
1050    
1051 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1052 nigel 93 the final argument match_condassert causes it to stop at the end of an
1053     assertion. */
1054 nigel 77
1055     else
1056     {
1057 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1058     match_condassert, RM3);
1059 nigel 77 if (rrc == MATCH_MATCH)
1060     {
1061 nigel 93 condition = TRUE;
1062     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1063 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1064     }
1065 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1066 nigel 77 {
1067     RRETURN(rrc); /* Need braces because of following else */
1068     }
1069 nigel 93 else
1070     {
1071     condition = FALSE;
1072 ph10 399 ecode += codelink;
1073 nigel 93 }
1074     }
1075 nigel 91
1076 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1077 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1078     match_cbegroup is required for an unlimited repeat of a possibly empty
1079     group. If the second alternative doesn't exist, we can just plough on. */
1080 nigel 91
1081 nigel 93 if (condition || *ecode == OP_ALT)
1082     {
1083 nigel 91 ecode += 1 + LINK_SIZE;
1084 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1085     {
1086     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1087     RRETURN(rrc);
1088     }
1089     else /* Group must match something */
1090     {
1091     flags = 0;
1092     goto TAIL_RECURSE;
1093     }
1094 nigel 77 }
1095 ph10 395 else /* Condition false & no alternative */
1096 nigel 93 {
1097     ecode += 1 + LINK_SIZE;
1098     }
1099     break;
1100 nigel 77
1101 ph10 461
1102 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1103     to close any currently open capturing brackets. */
1104 ph10 461
1105 ph10 447 case OP_CLOSE:
1106 ph10 461 number = GET2(ecode, 1);
1107 ph10 447 offset = number << 1;
1108 ph10 461
1109 ph10 475 #ifdef PCRE_DEBUG
1110 ph10 447 printf("end bracket %d at *ACCEPT", number);
1111     printf("\n");
1112     #endif
1113 nigel 77
1114 ph10 447 md->capture_last = number;
1115     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1116     {
1117     md->offset_vector[offset] =
1118     md->offset_vector[md->offset_end - number];
1119     md->offset_vector[offset+1] = eptr - md->start_subject;
1120     if (offset_top <= offset) offset_top = offset + 2;
1121     }
1122     ecode += 3;
1123 ph10 461 break;
1124 ph10 447
1125    
1126 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1127     recursion, we should restore the offsets appropriately and continue from
1128     after the call. */
1129 nigel 77
1130 ph10 210 case OP_ACCEPT:
1131 nigel 77 case OP_END:
1132     if (md->recursive != NULL && md->recursive->group_num == 0)
1133     {
1134     recursion_info *rec = md->recursive;
1135 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1136 nigel 77 md->recursive = rec->prevrec;
1137     memmove(md->offset_vector, rec->offset_save,
1138     rec->saved_max * sizeof(int));
1139 ph10 461 offset_top = rec->save_offset_top;
1140 nigel 77 ims = original_ims;
1141     ecode = rec->after_call;
1142     break;
1143     }
1144    
1145 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1146     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1147     the subject. In both cases, backtracking will then try other alternatives,
1148     if any. */
1149 ph10 443
1150 ph10 442 if (eptr == mstart &&
1151     (md->notempty ||
1152 ph10 443 (md->notempty_atstart &&
1153 ph10 442 mstart == md->start_subject + md->start_offset)))
1154 ph10 510 MRRETURN(MATCH_NOMATCH);
1155 ph10 443
1156 ph10 442 /* Otherwise, we have a match. */
1157 nigel 77
1158 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1159     md->end_offset_top = offset_top; /* and how many extracts were taken */
1160 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1161 nigel 77
1162 ph10 512 /* For some reason, the macros don't work properly if an expression is
1163     given as the argument to MRRETURN when the heap is in use. */
1164    
1165     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1166     MRRETURN(rrc);
1167    
1168 nigel 77 /* Change option settings */
1169    
1170     case OP_OPT:
1171     ims = ecode[1];
1172     ecode += 2;
1173     DPRINTF(("ims set to %02lx\n", ims));
1174     break;
1175    
1176     /* Assertion brackets. Check the alternative branches in turn - the
1177     matching won't pass the KET for an assertion. If any one branch matches,
1178     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1179     start of each branch to move the current point backwards, so the code at
1180     this level is identical to the lookahead case. */
1181    
1182     case OP_ASSERT:
1183     case OP_ASSERTBACK:
1184     do
1185     {
1186 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1187     RM4);
1188 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1189 ph10 500 {
1190     mstart = md->start_match_ptr; /* In case \K reset it */
1191     break;
1192 ph10 501 }
1193 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1194 nigel 77 ecode += GET(ecode, 1);
1195     }
1196     while (*ecode == OP_ALT);
1197 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1198 nigel 77
1199     /* If checking an assertion for a condition, return MATCH_MATCH. */
1200    
1201     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1202    
1203     /* Continue from after the assertion, updating the offsets high water
1204     mark, since extracts may have been taken during the assertion. */
1205    
1206     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1207     ecode += 1 + LINK_SIZE;
1208     offset_top = md->end_offset_top;
1209     continue;
1210    
1211 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1212 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1213 ph10 473 branches. */
1214 nigel 77
1215     case OP_ASSERT_NOT:
1216     case OP_ASSERTBACK_NOT:
1217     do
1218     {
1219 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1220     RM5);
1221 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1222 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1223     {
1224     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1225 ph10 482 break;
1226     }
1227 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1228 nigel 77 ecode += GET(ecode,1);
1229     }
1230     while (*ecode == OP_ALT);
1231    
1232     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1233    
1234     ecode += 1 + LINK_SIZE;
1235     continue;
1236    
1237     /* Move the subject pointer back. This occurs only at the start of
1238     each branch of a lookbehind assertion. If we are too close to the start to
1239     move back, this match function fails. When working with UTF-8 we move
1240     back a number of characters, not bytes. */
1241    
1242     case OP_REVERSE:
1243     #ifdef SUPPORT_UTF8
1244     if (utf8)
1245     {
1246 nigel 93 i = GET(ecode, 1);
1247     while (i-- > 0)
1248 nigel 77 {
1249     eptr--;
1250 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1251 ph10 207 BACKCHAR(eptr);
1252 nigel 77 }
1253     }
1254     else
1255     #endif
1256    
1257     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1258    
1259     {
1260 nigel 93 eptr -= GET(ecode, 1);
1261 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1262 nigel 77 }
1263    
1264 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1265 nigel 77
1266 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1267 nigel 77 ecode += 1 + LINK_SIZE;
1268     break;
1269    
1270     /* The callout item calls an external function, if one is provided, passing
1271     details of the match so far. This is mainly for debugging, though the
1272     function is able to force a failure. */
1273    
1274     case OP_CALLOUT:
1275     if (pcre_callout != NULL)
1276     {
1277     pcre_callout_block cb;
1278     cb.version = 1; /* Version 1 of the callout block */
1279     cb.callout_number = ecode[1];
1280     cb.offset_vector = md->offset_vector;
1281 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1282 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1283 ph10 168 cb.start_match = mstart - md->start_subject;
1284 nigel 77 cb.current_position = eptr - md->start_subject;
1285     cb.pattern_position = GET(ecode, 2);
1286     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1287     cb.capture_top = offset_top/2;
1288     cb.capture_last = md->capture_last;
1289     cb.callout_data = md->callout_data;
1290 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1291 nigel 77 if (rrc < 0) RRETURN(rrc);
1292     }
1293     ecode += 2 + 2*LINK_SIZE;
1294     break;
1295    
1296     /* Recursion either matches the current regex, or some subexpression. The
1297     offset data is the offset to the starting bracket from the start of the
1298     whole pattern. (This is so that it works from duplicated subpatterns.)
1299    
1300     If there are any capturing brackets started but not finished, we have to
1301     save their starting points and reinstate them after the recursion. However,
1302     we don't know how many such there are (offset_top records the completed
1303     total) so we just have to save all the potential data. There may be up to
1304     65535 such values, which is too large to put on the stack, but using malloc
1305     for small numbers seems expensive. As a compromise, the stack is used when
1306     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1307     is used. A problem is what to do if the malloc fails ... there is no way of
1308     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1309     values on the stack, and accept that the rest may be wrong.
1310    
1311     There are also other values that have to be saved. We use a chained
1312     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1313     for the original version of this logic. */
1314    
1315     case OP_RECURSE:
1316     {
1317     callpat = md->start_code + GET(ecode, 1);
1318 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1319     GET2(callpat, 1 + LINK_SIZE);
1320 nigel 77
1321     /* Add to "recursing stack" */
1322    
1323     new_recursive.prevrec = md->recursive;
1324     md->recursive = &new_recursive;
1325    
1326     /* Find where to continue from afterwards */
1327    
1328     ecode += 1 + LINK_SIZE;
1329     new_recursive.after_call = ecode;
1330    
1331     /* Now save the offset data. */
1332    
1333     new_recursive.saved_max = md->offset_end;
1334     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1335     new_recursive.offset_save = stacksave;
1336     else
1337     {
1338     new_recursive.offset_save =
1339     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1340     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1341     }
1342    
1343     memcpy(new_recursive.offset_save, md->offset_vector,
1344     new_recursive.saved_max * sizeof(int));
1345 ph10 461 new_recursive.save_offset_top = offset_top;
1346 nigel 77
1347     /* OK, now we can do the recursion. For each top-level alternative we
1348     restore the offset and recursion data. */
1349    
1350     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1351 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1352 nigel 77 do
1353     {
1354 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1355     md, ims, eptrb, flags, RM6);
1356 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1357 nigel 77 {
1358 nigel 87 DPRINTF(("Recursion matched\n"));
1359 nigel 77 md->recursive = new_recursive.prevrec;
1360     if (new_recursive.offset_save != stacksave)
1361     (pcre_free)(new_recursive.offset_save);
1362 ph10 510 MRRETURN(MATCH_MATCH);
1363 nigel 77 }
1364 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1365 nigel 87 {
1366     DPRINTF(("Recursion gave error %d\n", rrc));
1367 ph10 400 if (new_recursive.offset_save != stacksave)
1368     (pcre_free)(new_recursive.offset_save);
1369 nigel 87 RRETURN(rrc);
1370     }
1371 nigel 77
1372     md->recursive = &new_recursive;
1373     memcpy(md->offset_vector, new_recursive.offset_save,
1374     new_recursive.saved_max * sizeof(int));
1375     callpat += GET(callpat, 1);
1376     }
1377     while (*callpat == OP_ALT);
1378    
1379     DPRINTF(("Recursion didn't match\n"));
1380     md->recursive = new_recursive.prevrec;
1381     if (new_recursive.offset_save != stacksave)
1382     (pcre_free)(new_recursive.offset_save);
1383 ph10 510 MRRETURN(MATCH_NOMATCH);
1384 nigel 77 }
1385     /* Control never reaches here */
1386    
1387     /* "Once" brackets are like assertion brackets except that after a match,
1388     the point in the subject string is not moved back. Thus there can never be
1389     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1390     Check the alternative branches in turn - the matching won't pass the KET
1391     for this kind of subpattern. If any one branch matches, we carry on as at
1392 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1393     the start-of-match value in case it was changed by \K. */
1394 nigel 77
1395     case OP_ONCE:
1396 nigel 91 prev = ecode;
1397     saved_eptr = eptr;
1398    
1399     do
1400 nigel 77 {
1401 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1402 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1403 ph10 500 {
1404     mstart = md->start_match_ptr;
1405     break;
1406 ph10 501 }
1407 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1408 nigel 91 ecode += GET(ecode,1);
1409     }
1410     while (*ecode == OP_ALT);
1411 nigel 77
1412 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1413 nigel 77
1414 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1415 nigel 77
1416 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1417     mark, since extracts may have been taken. */
1418 nigel 77
1419 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1420 nigel 77
1421 nigel 91 offset_top = md->end_offset_top;
1422     eptr = md->end_match_ptr;
1423 nigel 77
1424 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1425     happens for a repeating ket if no characters were matched in the group.
1426     This is the forcible breaking of infinite loops as implemented in Perl
1427     5.005. If there is an options reset, it will get obeyed in the normal
1428     course of events. */
1429 nigel 77
1430 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1431     {
1432     ecode += 1+LINK_SIZE;
1433     break;
1434     }
1435 nigel 77
1436 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1437     preceding bracket, in the appropriate order. The second "call" of match()
1438     uses tail recursion, to avoid using another stack frame. We need to reset
1439     any options that changed within the bracket before re-running it, so
1440     check the next opcode. */
1441 nigel 77
1442 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1443     {
1444     ims = (ims & ~PCRE_IMS) | ecode[4];
1445     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1446     }
1447 nigel 77
1448 nigel 91 if (*ecode == OP_KETRMIN)
1449     {
1450 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1451 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1452     ecode = prev;
1453 ph10 197 flags = 0;
1454 nigel 91 goto TAIL_RECURSE;
1455 nigel 77 }
1456 nigel 91 else /* OP_KETRMAX */
1457     {
1458 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1459 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1460     ecode += 1 + LINK_SIZE;
1461 ph10 197 flags = 0;
1462 nigel 91 goto TAIL_RECURSE;
1463     }
1464     /* Control never gets here */
1465 nigel 77
1466     /* An alternation is the end of a branch; scan along to find the end of the
1467     bracketed group and go to there. */
1468    
1469     case OP_ALT:
1470     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1471     break;
1472    
1473 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1474     indicating that it may occur zero times. It may repeat infinitely, or not
1475     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1476     with fixed upper repeat limits are compiled as a number of copies, with the
1477     optional ones preceded by BRAZERO or BRAMINZERO. */
1478 nigel 77
1479     case OP_BRAZERO:
1480     {
1481     next = ecode+1;
1482 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1483 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1484     do next += GET(next,1); while (*next == OP_ALT);
1485 nigel 93 ecode = next + 1 + LINK_SIZE;
1486 nigel 77 }
1487     break;
1488    
1489     case OP_BRAMINZERO:
1490     {
1491     next = ecode+1;
1492 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1493 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1494 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495     ecode++;
1496     }
1497     break;
1498    
1499 ph10 335 case OP_SKIPZERO:
1500     {
1501     next = ecode+1;
1502     do next += GET(next,1); while (*next == OP_ALT);
1503     ecode = next + 1 + LINK_SIZE;
1504     }
1505     break;
1506    
1507 nigel 93 /* End of a group, repeated or non-repeating. */
1508 nigel 77
1509     case OP_KET:
1510     case OP_KETRMIN:
1511     case OP_KETRMAX:
1512 nigel 91 prev = ecode - GET(ecode, 1);
1513 nigel 77
1514 nigel 93 /* If this was a group that remembered the subject start, in order to break
1515     infinite repeats of empty string matches, retrieve the subject start from
1516     the chain. Otherwise, set it NULL. */
1517 nigel 77
1518 nigel 93 if (*prev >= OP_SBRA)
1519     {
1520     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1521     eptrb = eptrb->epb_prev; /* Backup to previous group */
1522     }
1523     else saved_eptr = NULL;
1524 nigel 77
1525 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1526     matching and return MATCH_MATCH, but record the current high water mark for
1527     use by positive assertions. We also need to record the match start in case
1528     it was changed by \K. */
1529 nigel 93
1530 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1531     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1532     *prev == OP_ONCE)
1533     {
1534     md->end_match_ptr = eptr; /* For ONCE */
1535     md->end_offset_top = offset_top;
1536 ph10 500 md->start_match_ptr = mstart;
1537 ph10 510 MRRETURN(MATCH_MATCH);
1538 nigel 91 }
1539 nigel 77
1540 nigel 93 /* For capturing groups we have to check the group number back at the start
1541     and if necessary complete handling an extraction by setting the offsets and
1542     bumping the high water mark. Note that whole-pattern recursion is coded as
1543     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1544     when the OP_END is reached. Other recursion is handled here. */
1545 nigel 77
1546 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1547 nigel 91 {
1548 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1549 nigel 91 offset = number << 1;
1550 ph10 461
1551 ph10 475 #ifdef PCRE_DEBUG
1552 nigel 91 printf("end bracket %d", number);
1553     printf("\n");
1554 nigel 77 #endif
1555    
1556 nigel 93 md->capture_last = number;
1557     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1558 nigel 91 {
1559 nigel 93 md->offset_vector[offset] =
1560     md->offset_vector[md->offset_end - number];
1561     md->offset_vector[offset+1] = eptr - md->start_subject;
1562     if (offset_top <= offset) offset_top = offset + 2;
1563     }
1564 nigel 77
1565 nigel 93 /* Handle a recursively called group. Restore the offsets
1566     appropriately and continue from after the call. */
1567 nigel 77
1568 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1569     {
1570     recursion_info *rec = md->recursive;
1571     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1572     md->recursive = rec->prevrec;
1573     memcpy(md->offset_vector, rec->offset_save,
1574     rec->saved_max * sizeof(int));
1575 ph10 461 offset_top = rec->save_offset_top;
1576 nigel 93 ecode = rec->after_call;
1577     ims = original_ims;
1578     break;
1579 nigel 77 }
1580 nigel 91 }
1581 nigel 77
1582 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1583     flags, in case they got changed during the group. */
1584 nigel 77
1585 nigel 91 ims = original_ims;
1586     DPRINTF(("ims reset to %02lx\n", ims));
1587 nigel 77
1588 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1589     happens for a repeating ket if no characters were matched in the group.
1590     This is the forcible breaking of infinite loops as implemented in Perl
1591     5.005. If there is an options reset, it will get obeyed in the normal
1592     course of events. */
1593 nigel 77
1594 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1595     {
1596     ecode += 1 + LINK_SIZE;
1597     break;
1598     }
1599 nigel 77
1600 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1601     preceding bracket, in the appropriate order. In the second case, we can use
1602 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1603     unlimited repeat of a group that can match an empty string. */
1604 nigel 77
1605 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1606    
1607 nigel 91 if (*ecode == OP_KETRMIN)
1608     {
1609 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1610 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1611 ph10 197 if (flags != 0) /* Could match an empty string */
1612     {
1613     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1614     RRETURN(rrc);
1615     }
1616 nigel 91 ecode = prev;
1617     goto TAIL_RECURSE;
1618 nigel 77 }
1619 nigel 91 else /* OP_KETRMAX */
1620     {
1621 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1622 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1623     ecode += 1 + LINK_SIZE;
1624 ph10 197 flags = 0;
1625 nigel 91 goto TAIL_RECURSE;
1626     }
1627     /* Control never gets here */
1628 nigel 77
1629     /* Start of subject unless notbol, or after internal newline if multiline */
1630    
1631     case OP_CIRC:
1632 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1633 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1634     {
1635 nigel 91 if (eptr != md->start_subject &&
1636 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1637 ph10 510 MRRETURN(MATCH_NOMATCH);
1638 nigel 77 ecode++;
1639     break;
1640     }
1641     /* ... else fall through */
1642    
1643     /* Start of subject assertion */
1644    
1645     case OP_SOD:
1646 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1647 nigel 77 ecode++;
1648     break;
1649    
1650     /* Start of match assertion */
1651    
1652     case OP_SOM:
1653 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1654 nigel 77 ecode++;
1655     break;
1656 ph10 172
1657 ph10 168 /* Reset the start of match point */
1658 ph10 172
1659 ph10 168 case OP_SET_SOM:
1660     mstart = eptr;
1661 ph10 172 ecode++;
1662     break;
1663 nigel 77
1664     /* Assert before internal newline if multiline, or before a terminating
1665     newline unless endonly is set, else end of subject unless noteol is set. */
1666    
1667     case OP_DOLL:
1668     if ((ims & PCRE_MULTILINE) != 0)
1669     {
1670     if (eptr < md->end_subject)
1671 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1672 nigel 77 else
1673 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1674 nigel 77 ecode++;
1675     break;
1676     }
1677     else
1678     {
1679 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1680 nigel 77 if (!md->endonly)
1681     {
1682 nigel 91 if (eptr != md->end_subject &&
1683 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1684 ph10 510 MRRETURN(MATCH_NOMATCH);
1685 nigel 77 ecode++;
1686     break;
1687     }
1688     }
1689 nigel 91 /* ... else fall through for endonly */
1690 nigel 77
1691     /* End of subject assertion (\z) */
1692    
1693     case OP_EOD:
1694 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1695 nigel 77 ecode++;
1696     break;
1697    
1698     /* End of subject or ending \n assertion (\Z) */
1699    
1700     case OP_EODN:
1701 nigel 91 if (eptr != md->end_subject &&
1702 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1703 ph10 510 MRRETURN(MATCH_NOMATCH);
1704 nigel 77 ecode++;
1705     break;
1706    
1707     /* Word boundary assertions */
1708    
1709     case OP_NOT_WORD_BOUNDARY:
1710     case OP_WORD_BOUNDARY:
1711     {
1712    
1713     /* Find out if the previous and current characters are "word" characters.
1714     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1715 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1716 ph10 435 partial matching. */
1717 nigel 77
1718     #ifdef SUPPORT_UTF8
1719     if (utf8)
1720     {
1721     if (eptr == md->start_subject) prev_is_word = FALSE; else
1722     {
1723 ph10 409 USPTR lastptr = eptr - 1;
1724 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1725 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1726 nigel 77 GETCHAR(c, lastptr);
1727     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1728     }
1729 ph10 443 if (eptr >= md->end_subject)
1730 nigel 77 {
1731 ph10 443 SCHECK_PARTIAL();
1732     cur_is_word = FALSE;
1733 ph10 428 }
1734     else
1735     {
1736 nigel 77 GETCHAR(c, eptr);
1737     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1738     }
1739     }
1740     else
1741     #endif
1742    
1743 ph10 428 /* Not in UTF-8 mode */
1744 nigel 77
1745     {
1746 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1747     {
1748 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1749 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1750     }
1751 ph10 443 if (eptr >= md->end_subject)
1752 ph10 428 {
1753 ph10 443 SCHECK_PARTIAL();
1754     cur_is_word = FALSE;
1755 ph10 428 }
1756     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1757 nigel 77 }
1758    
1759     /* Now see if the situation is what we want */
1760    
1761     if ((*ecode++ == OP_WORD_BOUNDARY)?
1762     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1763 ph10 510 MRRETURN(MATCH_NOMATCH);
1764 nigel 77 }
1765     break;
1766    
1767     /* Match a single character type; inline for speed */
1768    
1769     case OP_ANY:
1770 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1771 ph10 345 /* Fall through */
1772    
1773 ph10 341 case OP_ALLANY:
1774 ph10 443 if (eptr++ >= md->end_subject)
1775 ph10 428 {
1776 ph10 443 SCHECK_PARTIAL();
1777 ph10 510 MRRETURN(MATCH_NOMATCH);
1778 ph10 443 }
1779 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1780 nigel 77 ecode++;
1781     break;
1782    
1783     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1784     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1785    
1786     case OP_ANYBYTE:
1787 ph10 443 if (eptr++ >= md->end_subject)
1788 ph10 428 {
1789 ph10 443 SCHECK_PARTIAL();
1790 ph10 510 MRRETURN(MATCH_NOMATCH);
1791 ph10 443 }
1792 nigel 77 ecode++;
1793     break;
1794    
1795     case OP_NOT_DIGIT:
1796 ph10 443 if (eptr >= md->end_subject)
1797 ph10 428 {
1798 ph10 443 SCHECK_PARTIAL();
1799 ph10 510 MRRETURN(MATCH_NOMATCH);
1800 ph10 443 }
1801 nigel 77 GETCHARINCTEST(c, eptr);
1802     if (
1803     #ifdef SUPPORT_UTF8
1804     c < 256 &&
1805     #endif
1806     (md->ctypes[c] & ctype_digit) != 0
1807     )
1808 ph10 510 MRRETURN(MATCH_NOMATCH);
1809 nigel 77 ecode++;
1810     break;
1811    
1812     case OP_DIGIT:
1813 ph10 443 if (eptr >= md->end_subject)
1814 ph10 428 {
1815 ph10 443 SCHECK_PARTIAL();
1816 ph10 510 MRRETURN(MATCH_NOMATCH);
1817 ph10 443 }
1818 nigel 77 GETCHARINCTEST(c, eptr);
1819     if (
1820     #ifdef SUPPORT_UTF8
1821     c >= 256 ||
1822     #endif
1823     (md->ctypes[c] & ctype_digit) == 0
1824     )
1825 ph10 510 MRRETURN(MATCH_NOMATCH);
1826 nigel 77 ecode++;
1827     break;
1828    
1829     case OP_NOT_WHITESPACE:
1830 ph10 443 if (eptr >= md->end_subject)
1831 ph10 428 {
1832 ph10 443 SCHECK_PARTIAL();
1833 ph10 510 MRRETURN(MATCH_NOMATCH);
1834 ph10 443 }
1835 nigel 77 GETCHARINCTEST(c, eptr);
1836     if (
1837     #ifdef SUPPORT_UTF8
1838     c < 256 &&
1839     #endif
1840     (md->ctypes[c] & ctype_space) != 0
1841     )
1842 ph10 510 MRRETURN(MATCH_NOMATCH);
1843 nigel 77 ecode++;
1844     break;
1845    
1846     case OP_WHITESPACE:
1847 ph10 443 if (eptr >= md->end_subject)
1848 ph10 428 {
1849 ph10 443 SCHECK_PARTIAL();
1850 ph10 510 MRRETURN(MATCH_NOMATCH);
1851 ph10 443 }
1852 nigel 77 GETCHARINCTEST(c, eptr);
1853     if (
1854     #ifdef SUPPORT_UTF8
1855     c >= 256 ||
1856     #endif
1857     (md->ctypes[c] & ctype_space) == 0
1858     )
1859 ph10 510 MRRETURN(MATCH_NOMATCH);
1860 nigel 77 ecode++;
1861     break;
1862    
1863     case OP_NOT_WORDCHAR:
1864 ph10 443 if (eptr >= md->end_subject)
1865 ph10 428 {
1866 ph10 443 SCHECK_PARTIAL();
1867 ph10 510 MRRETURN(MATCH_NOMATCH);
1868 ph10 443 }
1869 nigel 77 GETCHARINCTEST(c, eptr);
1870     if (
1871     #ifdef SUPPORT_UTF8
1872     c < 256 &&
1873     #endif
1874     (md->ctypes[c] & ctype_word) != 0
1875     )
1876 ph10 510 MRRETURN(MATCH_NOMATCH);
1877 nigel 77 ecode++;
1878     break;
1879    
1880     case OP_WORDCHAR:
1881 ph10 443 if (eptr >= md->end_subject)
1882 ph10 428 {
1883 ph10 443 SCHECK_PARTIAL();
1884 ph10 510 MRRETURN(MATCH_NOMATCH);
1885 ph10 443 }
1886 nigel 77 GETCHARINCTEST(c, eptr);
1887     if (
1888     #ifdef SUPPORT_UTF8
1889     c >= 256 ||
1890     #endif
1891     (md->ctypes[c] & ctype_word) == 0
1892     )
1893 ph10 510 MRRETURN(MATCH_NOMATCH);
1894 nigel 77 ecode++;
1895     break;
1896    
1897 nigel 93 case OP_ANYNL:
1898 ph10 443 if (eptr >= md->end_subject)
1899 ph10 428 {
1900 ph10 443 SCHECK_PARTIAL();
1901 ph10 510 MRRETURN(MATCH_NOMATCH);
1902 ph10 443 }
1903 nigel 93 GETCHARINCTEST(c, eptr);
1904     switch(c)
1905     {
1906 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1907 nigel 93 case 0x000d:
1908     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1909     break;
1910 ph10 231
1911 nigel 93 case 0x000a:
1912 ph10 231 break;
1913    
1914 nigel 93 case 0x000b:
1915     case 0x000c:
1916     case 0x0085:
1917     case 0x2028:
1918     case 0x2029:
1919 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1920 nigel 93 break;
1921     }
1922     ecode++;
1923     break;
1924    
1925 ph10 178 case OP_NOT_HSPACE:
1926 ph10 443 if (eptr >= md->end_subject)
1927 ph10 428 {
1928 ph10 443 SCHECK_PARTIAL();
1929 ph10 510 MRRETURN(MATCH_NOMATCH);
1930 ph10 443 }
1931 ph10 178 GETCHARINCTEST(c, eptr);
1932     switch(c)
1933     {
1934     default: break;
1935     case 0x09: /* HT */
1936     case 0x20: /* SPACE */
1937     case 0xa0: /* NBSP */
1938     case 0x1680: /* OGHAM SPACE MARK */
1939     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1940     case 0x2000: /* EN QUAD */
1941     case 0x2001: /* EM QUAD */
1942     case 0x2002: /* EN SPACE */
1943     case 0x2003: /* EM SPACE */
1944     case 0x2004: /* THREE-PER-EM SPACE */
1945     case 0x2005: /* FOUR-PER-EM SPACE */
1946     case 0x2006: /* SIX-PER-EM SPACE */
1947     case 0x2007: /* FIGURE SPACE */
1948     case 0x2008: /* PUNCTUATION SPACE */
1949     case 0x2009: /* THIN SPACE */
1950     case 0x200A: /* HAIR SPACE */
1951     case 0x202f: /* NARROW NO-BREAK SPACE */
1952     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1953     case 0x3000: /* IDEOGRAPHIC SPACE */
1954 ph10 510 MRRETURN(MATCH_NOMATCH);
1955 ph10 178 }
1956     ecode++;
1957     break;
1958    
1959     case OP_HSPACE:
1960 ph10 443 if (eptr >= md->end_subject)
1961 ph10 428 {
1962 ph10 443 SCHECK_PARTIAL();
1963 ph10 510 MRRETURN(MATCH_NOMATCH);
1964 ph10 443 }
1965 ph10 178 GETCHARINCTEST(c, eptr);
1966     switch(c)
1967     {
1968 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1969 ph10 178 case 0x09: /* HT */
1970     case 0x20: /* SPACE */
1971     case 0xa0: /* NBSP */
1972     case 0x1680: /* OGHAM SPACE MARK */
1973     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1974     case 0x2000: /* EN QUAD */
1975     case 0x2001: /* EM QUAD */
1976     case 0x2002: /* EN SPACE */
1977     case 0x2003: /* EM SPACE */
1978     case 0x2004: /* THREE-PER-EM SPACE */
1979     case 0x2005: /* FOUR-PER-EM SPACE */
1980     case 0x2006: /* SIX-PER-EM SPACE */
1981     case 0x2007: /* FIGURE SPACE */
1982     case 0x2008: /* PUNCTUATION SPACE */
1983     case 0x2009: /* THIN SPACE */
1984     case 0x200A: /* HAIR SPACE */
1985     case 0x202f: /* NARROW NO-BREAK SPACE */
1986     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1987     case 0x3000: /* IDEOGRAPHIC SPACE */
1988     break;
1989     }
1990     ecode++;
1991     break;
1992    
1993     case OP_NOT_VSPACE:
1994 ph10 443 if (eptr >= md->end_subject)
1995 ph10 428 {
1996 ph10 443 SCHECK_PARTIAL();
1997 ph10 510 MRRETURN(MATCH_NOMATCH);
1998 ph10 443 }
1999 ph10 178 GETCHARINCTEST(c, eptr);
2000     switch(c)
2001     {
2002     default: break;
2003     case 0x0a: /* LF */
2004     case 0x0b: /* VT */
2005     case 0x0c: /* FF */
2006     case 0x0d: /* CR */
2007     case 0x85: /* NEL */
2008     case 0x2028: /* LINE SEPARATOR */
2009     case 0x2029: /* PARAGRAPH SEPARATOR */
2010 ph10 510 MRRETURN(MATCH_NOMATCH);
2011 ph10 178 }
2012     ecode++;
2013     break;
2014    
2015     case OP_VSPACE:
2016 ph10 443 if (eptr >= md->end_subject)
2017 ph10 428 {
2018 ph10 443 SCHECK_PARTIAL();
2019 ph10 510 MRRETURN(MATCH_NOMATCH);
2020 ph10 443 }
2021 ph10 178 GETCHARINCTEST(c, eptr);
2022     switch(c)
2023     {
2024 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2025 ph10 178 case 0x0a: /* LF */
2026     case 0x0b: /* VT */
2027     case 0x0c: /* FF */
2028     case 0x0d: /* CR */
2029     case 0x85: /* NEL */
2030     case 0x2028: /* LINE SEPARATOR */
2031     case 0x2029: /* PARAGRAPH SEPARATOR */
2032     break;
2033     }
2034     ecode++;
2035     break;
2036    
2037 nigel 77 #ifdef SUPPORT_UCP
2038     /* Check the next character by Unicode property. We will get here only
2039     if the support is in the binary; otherwise a compile-time error occurs. */
2040    
2041     case OP_PROP:
2042     case OP_NOTPROP:
2043 ph10 443 if (eptr >= md->end_subject)
2044 ph10 428 {
2045 ph10 443 SCHECK_PARTIAL();
2046 ph10 510 MRRETURN(MATCH_NOMATCH);
2047 ph10 443 }
2048 nigel 77 GETCHARINCTEST(c, eptr);
2049     {
2050 ph10 384 const ucd_record *prop = GET_UCD(c);
2051 nigel 77
2052 nigel 87 switch(ecode[1])
2053     {
2054     case PT_ANY:
2055 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2056 nigel 87 break;
2057 nigel 77
2058 nigel 87 case PT_LAMP:
2059 ph10 349 if ((prop->chartype == ucp_Lu ||
2060     prop->chartype == ucp_Ll ||
2061     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2062 ph10 510 MRRETURN(MATCH_NOMATCH);
2063 ph10 517 break;
2064 nigel 87
2065     case PT_GC:
2066 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2067 ph10 510 MRRETURN(MATCH_NOMATCH);
2068 nigel 87 break;
2069    
2070     case PT_PC:
2071 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2072 ph10 510 MRRETURN(MATCH_NOMATCH);
2073 nigel 87 break;
2074    
2075     case PT_SC:
2076 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2077 ph10 510 MRRETURN(MATCH_NOMATCH);
2078 nigel 87 break;
2079 ph10 517
2080     /* These are specials */
2081    
2082     case PT_ALNUM:
2083     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2084     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2085     MRRETURN(MATCH_NOMATCH);
2086     break;
2087    
2088     case PT_SPACE: /* Perl space */
2089     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2090     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2091     == (op == OP_NOTPROP))
2092     MRRETURN(MATCH_NOMATCH);
2093     break;
2094    
2095     case PT_PXSPACE: /* POSIX space */
2096     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2097     c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2098     c == CHAR_FF || c == CHAR_CR)
2099     == (op == OP_NOTPROP))
2100     MRRETURN(MATCH_NOMATCH);
2101     break;
2102 nigel 87
2103 ph10 517 case PT_WORD:
2104     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2105     _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2106     c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2107     MRRETURN(MATCH_NOMATCH);
2108     break;
2109    
2110     /* This should never occur */
2111    
2112 nigel 87 default:
2113     RRETURN(PCRE_ERROR_INTERNAL);
2114 nigel 77 }
2115 nigel 87
2116     ecode += 3;
2117 nigel 77 }
2118     break;
2119    
2120     /* Match an extended Unicode sequence. We will get here only if the support
2121     is in the binary; otherwise a compile-time error occurs. */
2122    
2123     case OP_EXTUNI:
2124 ph10 443 if (eptr >= md->end_subject)
2125 ph10 428 {
2126 ph10 443 SCHECK_PARTIAL();
2127 ph10 510 MRRETURN(MATCH_NOMATCH);
2128 ph10 443 }
2129 nigel 77 GETCHARINCTEST(c, eptr);
2130     {
2131 ph10 349 int category = UCD_CATEGORY(c);
2132 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2133 nigel 77 while (eptr < md->end_subject)
2134     {
2135     int len = 1;
2136     if (!utf8) c = *eptr; else
2137     {
2138     GETCHARLEN(c, eptr, len);
2139     }
2140 ph10 349 category = UCD_CATEGORY(c);
2141 nigel 77 if (category != ucp_M) break;
2142     eptr += len;
2143     }
2144     }
2145     ecode++;
2146     break;
2147     #endif
2148    
2149    
2150     /* Match a back reference, possibly repeatedly. Look past the end of the
2151     item to see if there is repeat information following. The code is similar
2152     to that for character classes, but repeated for efficiency. Then obey
2153     similar code to character type repeats - written out again for speed.
2154     However, if the referenced string is the empty string, always treat
2155     it as matched, any number of times (otherwise there could be infinite
2156     loops). */
2157    
2158     case OP_REF:
2159     {
2160     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2161 ph10 345 ecode += 3;
2162    
2163 ph10 336 /* If the reference is unset, there are two possibilities:
2164 ph10 345
2165 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2166     than the amount of subject left; this ensures that every attempt at a
2167     match fails. We can't just fail here, because of the possibility of
2168     quantifiers with zero minima.
2169 ph10 345
2170     (b) If the JavaScript compatibility flag is set, set the length to zero
2171     so that the back reference matches an empty string.
2172    
2173     Otherwise, set the length to the length of what was matched by the
2174 ph10 336 referenced subpattern. */
2175 ph10 345
2176 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2177 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2178 ph10 336 else
2179     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2180 nigel 77
2181     /* Set up for repetition, or handle the non-repeated case */
2182    
2183     switch (*ecode)
2184     {
2185     case OP_CRSTAR:
2186     case OP_CRMINSTAR:
2187     case OP_CRPLUS:
2188     case OP_CRMINPLUS:
2189     case OP_CRQUERY:
2190     case OP_CRMINQUERY:
2191     c = *ecode++ - OP_CRSTAR;
2192     minimize = (c & 1) != 0;
2193     min = rep_min[c]; /* Pick up values from tables; */
2194     max = rep_max[c]; /* zero for max => infinity */
2195     if (max == 0) max = INT_MAX;
2196     break;
2197    
2198     case OP_CRRANGE:
2199     case OP_CRMINRANGE:
2200     minimize = (*ecode == OP_CRMINRANGE);
2201     min = GET2(ecode, 1);
2202     max = GET2(ecode, 3);
2203     if (max == 0) max = INT_MAX;
2204     ecode += 5;
2205     break;
2206    
2207     default: /* No repeat follows */
2208 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2209 ph10 428 {
2210 ph10 443 CHECK_PARTIAL();
2211 ph10 510 MRRETURN(MATCH_NOMATCH);
2212 ph10 443 }
2213 nigel 77 eptr += length;
2214     continue; /* With the main loop */
2215     }
2216    
2217     /* If the length of the reference is zero, just continue with the
2218     main loop. */
2219 ph10 443
2220 nigel 77 if (length == 0) continue;
2221    
2222     /* First, ensure the minimum number of matches are present. We get back
2223     the length of the reference string explicitly rather than passing the
2224     address of eptr, so that eptr can be a register variable. */
2225    
2226     for (i = 1; i <= min; i++)
2227     {
2228 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2229 ph10 426 {
2230 ph10 427 CHECK_PARTIAL();
2231 ph10 510 MRRETURN(MATCH_NOMATCH);
2232 ph10 427 }
2233 nigel 77 eptr += length;
2234     }
2235    
2236     /* If min = max, continue at the same level without recursion.
2237     They are not both allowed to be zero. */
2238    
2239     if (min == max) continue;
2240    
2241     /* If minimizing, keep trying and advancing the pointer */
2242    
2243     if (minimize)
2244     {
2245     for (fi = min;; fi++)
2246     {
2247 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2248 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2249 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2250 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2251 ph10 426 {
2252 ph10 427 CHECK_PARTIAL();
2253 ph10 510 MRRETURN(MATCH_NOMATCH);
2254 ph10 427 }
2255 nigel 77 eptr += length;
2256     }
2257     /* Control never gets here */
2258     }
2259    
2260     /* If maximizing, find the longest string and work backwards */
2261    
2262     else
2263     {
2264     pp = eptr;
2265     for (i = min; i < max; i++)
2266     {
2267 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2268 ph10 462 {
2269 ph10 463 CHECK_PARTIAL();
2270 ph10 462 break;
2271 ph10 463 }
2272 nigel 77 eptr += length;
2273     }
2274     while (eptr >= pp)
2275     {
2276 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2277 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2278     eptr -= length;
2279     }
2280 ph10 510 MRRETURN(MATCH_NOMATCH);
2281 nigel 77 }
2282     }
2283     /* Control never gets here */
2284    
2285     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2286     used when all the characters in the class have values in the range 0-255,
2287     and either the matching is caseful, or the characters are in the range
2288     0-127 when UTF-8 processing is enabled. The only difference between
2289     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2290     encountered.
2291    
2292     First, look past the end of the item to see if there is repeat information
2293     following. Then obey similar code to character type repeats - written out
2294     again for speed. */
2295    
2296     case OP_NCLASS:
2297     case OP_CLASS:
2298     {
2299     data = ecode + 1; /* Save for matching */
2300     ecode += 33; /* Advance past the item */
2301    
2302     switch (*ecode)
2303     {
2304     case OP_CRSTAR:
2305     case OP_CRMINSTAR:
2306     case OP_CRPLUS:
2307     case OP_CRMINPLUS:
2308     case OP_CRQUERY:
2309     case OP_CRMINQUERY:
2310     c = *ecode++ - OP_CRSTAR;
2311     minimize = (c & 1) != 0;
2312     min = rep_min[c]; /* Pick up values from tables; */
2313     max = rep_max[c]; /* zero for max => infinity */
2314     if (max == 0) max = INT_MAX;
2315     break;
2316    
2317     case OP_CRRANGE:
2318     case OP_CRMINRANGE:
2319     minimize = (*ecode == OP_CRMINRANGE);
2320     min = GET2(ecode, 1);
2321     max = GET2(ecode, 3);
2322     if (max == 0) max = INT_MAX;
2323     ecode += 5;
2324     break;
2325    
2326     default: /* No repeat follows */
2327     min = max = 1;
2328     break;
2329     }
2330    
2331     /* First, ensure the minimum number of matches are present. */
2332    
2333     #ifdef SUPPORT_UTF8
2334     /* UTF-8 mode */
2335     if (utf8)
2336     {
2337     for (i = 1; i <= min; i++)
2338     {
2339 ph10 427 if (eptr >= md->end_subject)
2340 ph10 426 {
2341 ph10 428 SCHECK_PARTIAL();
2342 ph10 510 MRRETURN(MATCH_NOMATCH);
2343 ph10 427 }
2344 nigel 77 GETCHARINC(c, eptr);
2345     if (c > 255)
2346     {
2347 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2348 nigel 77 }
2349     else
2350     {
2351 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2352 nigel 77 }
2353     }
2354     }
2355     else
2356     #endif
2357     /* Not UTF-8 mode */
2358     {
2359     for (i = 1; i <= min; i++)
2360     {
2361 ph10 427 if (eptr >= md->end_subject)
2362 ph10 426 {
2363 ph10 428 SCHECK_PARTIAL();
2364 ph10 510 MRRETURN(MATCH_NOMATCH);
2365 ph10 427 }
2366 nigel 77 c = *eptr++;
2367 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2368 nigel 77 }
2369     }
2370    
2371     /* If max == min we can continue with the main loop without the
2372     need to recurse. */
2373    
2374     if (min == max) continue;
2375    
2376     /* If minimizing, keep testing the rest of the expression and advancing
2377     the pointer while it matches the class. */
2378    
2379     if (minimize)
2380     {
2381     #ifdef SUPPORT_UTF8
2382     /* UTF-8 mode */
2383     if (utf8)
2384     {
2385     for (fi = min;; fi++)
2386     {
2387 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2388 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2389 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2390 ph10 427 if (eptr >= md->end_subject)
2391 ph10 426 {
2392 ph10 427 SCHECK_PARTIAL();
2393 ph10 510 MRRETURN(MATCH_NOMATCH);
2394 ph10 427 }
2395 nigel 77 GETCHARINC(c, eptr);
2396     if (c > 255)
2397     {
2398 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2399 nigel 77 }
2400     else
2401     {
2402 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2403 nigel 77 }
2404     }
2405     }
2406     else
2407     #endif
2408     /* Not UTF-8 mode */
2409     {
2410     for (fi = min;; fi++)
2411     {
2412 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2413 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2414 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2415 ph10 427 if (eptr >= md->end_subject)
2416 ph10 426 {
2417 ph10 427 SCHECK_PARTIAL();
2418 ph10 510 MRRETURN(MATCH_NOMATCH);
2419 ph10 427 }
2420 nigel 77 c = *eptr++;
2421 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2422 nigel 77 }
2423     }
2424     /* Control never gets here */
2425     }
2426    
2427     /* If maximizing, find the longest possible run, then work backwards. */
2428    
2429     else
2430     {
2431     pp = eptr;
2432    
2433     #ifdef SUPPORT_UTF8
2434     /* UTF-8 mode */
2435     if (utf8)
2436     {
2437     for (i = min; i < max; i++)
2438     {
2439     int len = 1;
2440 ph10 463 if (eptr >= md->end_subject)
2441 ph10 462 {
2442 ph10 463 SCHECK_PARTIAL();
2443 ph10 462 break;
2444 ph10 463 }
2445 nigel 77 GETCHARLEN(c, eptr, len);
2446     if (c > 255)
2447     {
2448     if (op == OP_CLASS) break;
2449     }
2450     else
2451     {
2452     if ((data[c/8] & (1 << (c&7))) == 0) break;
2453     }
2454     eptr += len;
2455     }
2456     for (;;)
2457     {
2458 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2459 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2460     if (eptr-- == pp) break; /* Stop if tried at original pos */
2461     BACKCHAR(eptr);
2462     }
2463     }
2464     else
2465     #endif
2466     /* Not UTF-8 mode */
2467     {
2468     for (i = min; i < max; i++)
2469     {
2470 ph10 463 if (eptr >= md->end_subject)
2471 ph10 462 {
2472 ph10 463 SCHECK_PARTIAL();
2473 ph10 462 break;
2474 ph10 463 }
2475 nigel 77 c = *eptr;
2476     if ((data[c/8] & (1 << (c&7))) == 0) break;
2477     eptr++;
2478     }
2479     while (eptr >= pp)
2480     {
2481 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2482 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2483 nigel 77 eptr--;
2484     }
2485     }
2486    
2487 ph10 510 MRRETURN(MATCH_NOMATCH);
2488 nigel 77 }
2489     }
2490     /* Control never gets here */
2491    
2492    
2493     /* Match an extended character class. This opcode is encountered only
2494 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2495     mode, because Unicode properties are supported in non-UTF-8 mode. */
2496 nigel 77
2497     #ifdef SUPPORT_UTF8
2498     case OP_XCLASS:
2499     {
2500     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2501     ecode += GET(ecode, 1); /* Advance past the item */
2502    
2503     switch (*ecode)
2504     {
2505     case OP_CRSTAR:
2506     case OP_CRMINSTAR:
2507     case OP_CRPLUS:
2508     case OP_CRMINPLUS:
2509     case OP_CRQUERY:
2510     case OP_CRMINQUERY:
2511     c = *ecode++ - OP_CRSTAR;
2512     minimize = (c & 1) != 0;
2513     min = rep_min[c]; /* Pick up values from tables; */
2514     max = rep_max[c]; /* zero for max => infinity */
2515     if (max == 0) max = INT_MAX;
2516     break;
2517    
2518     case OP_CRRANGE:
2519     case OP_CRMINRANGE:
2520     minimize = (*ecode == OP_CRMINRANGE);
2521     min = GET2(ecode, 1);
2522     max = GET2(ecode, 3);
2523     if (max == 0) max = INT_MAX;
2524     ecode += 5;
2525     break;
2526    
2527     default: /* No repeat follows */
2528     min = max = 1;
2529     break;
2530     }
2531    
2532     /* First, ensure the minimum number of matches are present. */
2533    
2534     for (i = 1; i <= min; i++)
2535     {
2536 ph10 427 if (eptr >= md->end_subject)
2537 ph10 426 {
2538     SCHECK_PARTIAL();
2539 ph10 510 MRRETURN(MATCH_NOMATCH);
2540 ph10 427 }
2541 ph10 384 GETCHARINCTEST(c, eptr);
2542 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2543 nigel 77 }
2544    
2545     /* If max == min we can continue with the main loop without the
2546     need to recurse. */
2547    
2548     if (min == max) continue;
2549    
2550     /* If minimizing, keep testing the rest of the expression and advancing
2551     the pointer while it matches the class. */
2552    
2553     if (minimize)
2554     {
2555     for (fi = min;; fi++)
2556     {
2557 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2558 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2559 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2560 ph10 427 if (eptr >= md->end_subject)
2561 ph10 426 {
2562 ph10 427 SCHECK_PARTIAL();
2563 ph10 510 MRRETURN(MATCH_NOMATCH);
2564 ph10 427 }
2565 ph10 384 GETCHARINCTEST(c, eptr);
2566 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2567 nigel 77 }
2568     /* Control never gets here */
2569     }
2570    
2571     /* If maximizing, find the longest possible run, then work backwards. */
2572    
2573     else
2574     {
2575     pp = eptr;
2576     for (i = min; i < max; i++)
2577     {
2578     int len = 1;
2579 ph10 463 if (eptr >= md->end_subject)
2580 ph10 462 {
2581 ph10 463 SCHECK_PARTIAL();
2582 ph10 462 break;
2583 ph10 463 }
2584 ph10 384 GETCHARLENTEST(c, eptr, len);
2585 nigel 77 if (!_pcre_xclass(c, data)) break;
2586     eptr += len;
2587     }
2588     for(;;)
2589     {
2590 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2591 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2592     if (eptr-- == pp) break; /* Stop if tried at original pos */
2593 ph10 214 if (utf8) BACKCHAR(eptr);
2594 nigel 77 }
2595 ph10 510 MRRETURN(MATCH_NOMATCH);
2596 nigel 77 }
2597    
2598     /* Control never gets here */
2599     }
2600     #endif /* End of XCLASS */
2601    
2602     /* Match a single character, casefully */
2603    
2604     case OP_CHAR:
2605     #ifdef SUPPORT_UTF8
2606     if (utf8)
2607     {
2608     length = 1;
2609     ecode++;
2610     GETCHARLEN(fc, ecode, length);
2611 ph10 443 if (length > md->end_subject - eptr)
2612 ph10 428 {
2613     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2614 ph10 510 MRRETURN(MATCH_NOMATCH);
2615 ph10 443 }
2616 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2617 nigel 77 }
2618     else
2619     #endif
2620    
2621     /* Non-UTF-8 mode */
2622     {
2623 ph10 443 if (md->end_subject - eptr < 1)
2624 ph10 428 {
2625     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2626 ph10 510 MRRETURN(MATCH_NOMATCH);
2627 ph10 443 }
2628 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2629 nigel 77 ecode += 2;
2630     }
2631     break;
2632    
2633     /* Match a single character, caselessly */
2634    
2635     case OP_CHARNC:
2636     #ifdef SUPPORT_UTF8
2637     if (utf8)
2638     {
2639     length = 1;
2640     ecode++;
2641     GETCHARLEN(fc, ecode, length);
2642    
2643 ph10 443 if (length > md->end_subject - eptr)
2644 ph10 428 {
2645     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2646 ph10 510 MRRETURN(MATCH_NOMATCH);
2647 ph10 443 }
2648 nigel 77
2649     /* If the pattern character's value is < 128, we have only one byte, and
2650     can use the fast lookup table. */
2651    
2652     if (fc < 128)
2653     {
2654 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2655 nigel 77 }
2656    
2657     /* Otherwise we must pick up the subject character */
2658    
2659     else
2660     {
2661 nigel 93 unsigned int dc;
2662 nigel 77 GETCHARINC(dc, eptr);
2663     ecode += length;
2664    
2665     /* If we have Unicode property support, we can use it to test the other
2666 nigel 87 case of the character, if there is one. */
2667 nigel 77
2668     if (fc != dc)
2669     {
2670     #ifdef SUPPORT_UCP
2671 ph10 349 if (dc != UCD_OTHERCASE(fc))
2672 nigel 77 #endif
2673 ph10 510 MRRETURN(MATCH_NOMATCH);
2674 nigel 77 }
2675     }
2676     }
2677     else
2678     #endif /* SUPPORT_UTF8 */
2679    
2680     /* Non-UTF-8 mode */
2681     {
2682 ph10 443 if (md->end_subject - eptr < 1)
2683 ph10 428 {
2684 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2685 ph10 510 MRRETURN(MATCH_NOMATCH);
2686 ph10 443 }
2687 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2688 nigel 77 ecode += 2;
2689     }
2690     break;
2691    
2692 nigel 93 /* Match a single character repeatedly. */
2693 nigel 77
2694     case OP_EXACT:
2695     min = max = GET2(ecode, 1);
2696     ecode += 3;
2697     goto REPEATCHAR;
2698    
2699 nigel 93 case OP_POSUPTO:
2700     possessive = TRUE;
2701     /* Fall through */
2702    
2703 nigel 77 case OP_UPTO:
2704     case OP_MINUPTO:
2705     min = 0;
2706     max = GET2(ecode, 1);
2707     minimize = *ecode == OP_MINUPTO;
2708     ecode += 3;
2709     goto REPEATCHAR;
2710    
2711 nigel 93 case OP_POSSTAR:
2712     possessive = TRUE;
2713     min = 0;
2714     max = INT_MAX;
2715     ecode++;
2716     goto REPEATCHAR;
2717    
2718     case OP_POSPLUS:
2719     possessive = TRUE;
2720     min = 1;
2721     max = INT_MAX;
2722     ecode++;
2723     goto REPEATCHAR;
2724    
2725     case OP_POSQUERY:
2726     possessive = TRUE;
2727     min = 0;
2728     max = 1;
2729     ecode++;
2730     goto REPEATCHAR;
2731    
2732 nigel 77 case OP_STAR:
2733     case OP_MINSTAR:
2734     case OP_PLUS:
2735     case OP_MINPLUS:
2736     case OP_QUERY:
2737     case OP_MINQUERY:
2738     c = *ecode++ - OP_STAR;
2739     minimize = (c & 1) != 0;
2740 ph10 443
2741 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2742     max = rep_max[c]; /* zero for max => infinity */
2743     if (max == 0) max = INT_MAX;
2744    
2745 ph10 426 /* Common code for all repeated single-character matches. */
2746 nigel 77
2747     REPEATCHAR:
2748     #ifdef SUPPORT_UTF8
2749     if (utf8)
2750     {
2751     length = 1;
2752     charptr = ecode;
2753     GETCHARLEN(fc, ecode, length);
2754     ecode += length;
2755    
2756     /* Handle multibyte character matching specially here. There is
2757     support for caseless matching if UCP support is present. */
2758    
2759     if (length > 1)
2760     {
2761     #ifdef SUPPORT_UCP
2762 nigel 93 unsigned int othercase;
2763 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2764 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2765 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2766 ph10 115 else oclength = 0;
2767 nigel 77 #endif /* SUPPORT_UCP */
2768    
2769     for (i = 1; i <= min; i++)
2770     {
2771 ph10 426 if (eptr <= md->end_subject - length &&
2772     memcmp(eptr, charptr, length) == 0) eptr += length;
2773 ph10 123 #ifdef SUPPORT_UCP
2774 ph10 426 else if (oclength > 0 &&
2775     eptr <= md->end_subject - oclength &&
2776     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2777     #endif /* SUPPORT_UCP */
2778 nigel 77 else
2779     {
2780 ph10 426 CHECK_PARTIAL();
2781 ph10 510 MRRETURN(MATCH_NOMATCH);
2782 nigel 77 }
2783     }
2784    
2785     if (min == max) continue;
2786    
2787     if (minimize)
2788     {
2789     for (fi = min;; fi++)
2790     {
2791 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2792 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2793 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2794 ph10 426 if (eptr <= md->end_subject - length &&
2795     memcmp(eptr, charptr, length) == 0) eptr += length;
2796 ph10 123 #ifdef SUPPORT_UCP
2797 ph10 426 else if (oclength > 0 &&
2798     eptr <= md->end_subject - oclength &&
2799     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2800     #endif /* SUPPORT_UCP */
2801 nigel 77 else
2802     {
2803 ph10 426 CHECK_PARTIAL();
2804 ph10 510 MRRETURN(MATCH_NOMATCH);
2805 nigel 77 }
2806     }
2807     /* Control never gets here */
2808     }
2809 nigel 93
2810     else /* Maximize */
2811 nigel 77 {
2812     pp = eptr;
2813     for (i = min; i < max; i++)
2814     {
2815 ph10 426 if (eptr <= md->end_subject - length &&
2816     memcmp(eptr, charptr, length) == 0) eptr += length;
2817 ph10 123 #ifdef SUPPORT_UCP
2818 ph10 426 else if (oclength > 0 &&
2819     eptr <= md->end_subject - oclength &&
2820     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2821     #endif /* SUPPORT_UCP */
2822 ph10 463 else
2823 ph10 462 {
2824 ph10 463 CHECK_PARTIAL();
2825 ph10 462 break;
2826 ph10 463 }
2827 nigel 77 }
2828 nigel 93
2829     if (possessive) continue;
2830 ph10 427
2831 ph10 120 for(;;)
2832 ph10 426 {
2833     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2834     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2835 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2836 ph10 115 #ifdef SUPPORT_UCP
2837 ph10 426 eptr--;
2838     BACKCHAR(eptr);
2839 ph10 123 #else /* without SUPPORT_UCP */
2840 ph10 426 eptr -= length;
2841 ph10 123 #endif /* SUPPORT_UCP */
2842 ph10 426 }
2843 nigel 77 }
2844     /* Control never gets here */
2845     }
2846    
2847     /* If the length of a UTF-8 character is 1, we fall through here, and
2848     obey the code as for non-UTF-8 characters below, though in this case the
2849     value of fc will always be < 128. */
2850     }
2851     else
2852     #endif /* SUPPORT_UTF8 */
2853    
2854     /* When not in UTF-8 mode, load a single-byte character. */
2855    
2856 ph10 426 fc = *ecode++;
2857 ph10 443
2858 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2859     may not be in UTF-8 mode. The code is duplicated for the caseless and
2860     caseful cases, for speed, since matching characters is likely to be quite
2861     common. First, ensure the minimum number of matches are present. If min =
2862     max, continue at the same level without recursing. Otherwise, if
2863     minimizing, keep trying the rest of the expression and advancing one
2864     matching character if failing, up to the maximum. Alternatively, if
2865     maximizing, find the maximum number of characters and work backwards. */
2866    
2867     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2868     max, eptr));
2869    
2870     if ((ims & PCRE_CASELESS) != 0)
2871     {
2872     fc = md->lcc[fc];
2873     for (i = 1; i <= min; i++)
2874 ph10 426 {
2875     if (eptr >= md->end_subject)
2876     {
2877     SCHECK_PARTIAL();
2878 ph10 510 MRRETURN(MATCH_NOMATCH);
2879 ph10 426 }
2880 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2881 ph10 426 }
2882 nigel 77 if (min == max) continue;
2883     if (minimize)
2884     {
2885     for (fi = min;; fi++)
2886     {
2887 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2888 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890 ph10 426 if (eptr >= md->end_subject)
2891     {
2892 ph10 427 SCHECK_PARTIAL();
2893 ph10 510 MRRETURN(MATCH_NOMATCH);
2894 ph10 426 }
2895 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2896 nigel 77 }
2897     /* Control never gets here */
2898     }
2899 nigel 93 else /* Maximize */
2900 nigel 77 {
2901     pp = eptr;
2902     for (i = min; i < max; i++)
2903     {
2904 ph10 463 if (eptr >= md->end_subject)
2905 ph10 462 {
2906     SCHECK_PARTIAL();
2907     break;
2908 ph10 463 }
2909 ph10 462 if (fc != md->lcc[*eptr]) break;
2910 nigel 77 eptr++;
2911     }
2912 ph10 427
2913 nigel 93 if (possessive) continue;
2914 ph10 427
2915 nigel 77 while (eptr >= pp)
2916     {
2917 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2918 nigel 77 eptr--;
2919     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2920     }
2921 ph10 510 MRRETURN(MATCH_NOMATCH);
2922 nigel 77 }
2923     /* Control never gets here */
2924     }
2925    
2926     /* Caseful comparisons (includes all multi-byte characters) */
2927    
2928     else
2929     {
2930 ph10 427 for (i = 1; i <= min; i++)
2931 ph10 426 {
2932     if (eptr >= md->end_subject)
2933     {
2934     SCHECK_PARTIAL();
2935 ph10 510 MRRETURN(MATCH_NOMATCH);
2936 ph10 426 }
2937 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2938 ph10 427 }
2939 ph10 443
2940 nigel 77 if (min == max) continue;
2941 ph10 443
2942 nigel 77 if (minimize)
2943     {
2944     for (fi = min;; fi++)
2945     {
2946 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2947 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2949 ph10 426 if (eptr >= md->end_subject)
2950 ph10 427 {
2951 ph10 426 SCHECK_PARTIAL();
2952 ph10 510 MRRETURN(MATCH_NOMATCH);
2953 ph10 427 }
2954 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2955 nigel 77 }
2956     /* Control never gets here */
2957     }
2958 nigel 93 else /* Maximize */
2959 nigel 77 {
2960     pp = eptr;
2961     for (i = min; i < max; i++)
2962     {
2963 ph10 463 if (eptr >= md->end_subject)
2964 ph10 462 {
2965 ph10 463 SCHECK_PARTIAL();
2966 ph10 462 break;
2967 ph10 463 }
2968 ph10 462 if (fc != *eptr) break;
2969 nigel 77 eptr++;
2970     }
2971 nigel 93 if (possessive) continue;
2972 ph10 443
2973 nigel 77 while (eptr >= pp)
2974     {
2975 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2976 nigel 77 eptr--;
2977     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2978     }
2979 ph10 510 MRRETURN(MATCH_NOMATCH);
2980 nigel 77 }
2981     }
2982     /* Control never gets here */
2983    
2984     /* Match a negated single one-byte character. The character we are
2985     checking can be multibyte. */
2986    
2987     case OP_NOT:
2988 ph10 443 if (eptr >= md->end_subject)
2989 ph10 428 {
2990 ph10 443 SCHECK_PARTIAL();
2991 ph10 510 MRRETURN(MATCH_NOMATCH);
2992 ph10 443 }
2993 nigel 77 ecode++;
2994     GETCHARINCTEST(c, eptr);
2995     if ((ims & PCRE_CASELESS) != 0)
2996     {
2997     #ifdef SUPPORT_UTF8
2998     if (c < 256)
2999     #endif
3000     c = md->lcc[c];
3001 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3002 nigel 77 }
3003     else
3004     {
3005 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3006 nigel 77 }
3007     break;
3008    
3009     /* Match a negated single one-byte character repeatedly. This is almost a
3010     repeat of the code for a repeated single character, but I haven't found a
3011     nice way of commoning these up that doesn't require a test of the
3012     positive/negative option for each character match. Maybe that wouldn't add
3013     very much to the time taken, but character matching *is* what this is all
3014     about... */
3015    
3016     case OP_NOTEXACT:
3017     min = max = GET2(ecode, 1);
3018     ecode += 3;
3019     goto REPEATNOTCHAR;
3020    
3021     case OP_NOTUPTO:
3022     case OP_NOTMINUPTO:
3023     min = 0;
3024     max = GET2(ecode, 1);
3025     minimize = *ecode == OP_NOTMINUPTO;
3026     ecode += 3;
3027     goto REPEATNOTCHAR;
3028    
3029 nigel 93 case OP_NOTPOSSTAR:
3030     possessive = TRUE;
3031     min = 0;
3032     max = INT_MAX;
3033     ecode++;
3034     goto REPEATNOTCHAR;
3035    
3036     case OP_NOTPOSPLUS:
3037     possessive = TRUE;
3038     min = 1;
3039     max = INT_MAX;
3040     ecode++;
3041     goto REPEATNOTCHAR;
3042    
3043     case OP_NOTPOSQUERY:
3044     possessive = TRUE;
3045     min = 0;
3046     max = 1;
3047     ecode++;
3048     goto REPEATNOTCHAR;
3049    
3050     case OP_NOTPOSUPTO:
3051     possessive = TRUE;
3052     min = 0;
3053     max = GET2(ecode, 1);
3054     ecode += 3;
3055     goto REPEATNOTCHAR;
3056    
3057 nigel 77 case OP_NOTSTAR:
3058     case OP_NOTMINSTAR:
3059     case OP_NOTPLUS:
3060     case OP_NOTMINPLUS:
3061     case OP_NOTQUERY:
3062     case OP_NOTMINQUERY:
3063     c = *ecode++ - OP_NOTSTAR;
3064     minimize = (c & 1) != 0;
3065     min = rep_min[c]; /* Pick up values from tables; */
3066     max = rep_max[c]; /* zero for max => infinity */
3067     if (max == 0) max = INT_MAX;
3068    
3069 ph10 426 /* Common code for all repeated single-byte matches. */
3070 nigel 77
3071     REPEATNOTCHAR:
3072     fc = *ecode++;
3073    
3074     /* The code is duplicated for the caseless and caseful cases, for speed,
3075     since matching characters is likely to be quite common. First, ensure the
3076     minimum number of matches are present. If min = max, continue at the same
3077     level without recursing. Otherwise, if minimizing, keep trying the rest of
3078     the expression and advancing one matching character if failing, up to the
3079     maximum. Alternatively, if maximizing, find the maximum number of
3080     characters and work backwards. */
3081    
3082     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3083     max, eptr));
3084    
3085     if ((ims & PCRE_CASELESS) != 0)
3086     {
3087     fc = md->lcc[fc];
3088    
3089     #ifdef SUPPORT_UTF8
3090     /* UTF-8 mode */
3091     if (utf8)
3092     {
3093 nigel 93 register unsigned int d;
3094 nigel 77 for (i = 1; i <= min; i++)
3095     {
3096 ph10 426 if (eptr >= md->end_subject)
3097     {
3098     SCHECK_PARTIAL();
3099 ph10 510 MRRETURN(MATCH_NOMATCH);
3100 ph10 427 }
3101 nigel 77 GETCHARINC(d, eptr);
3102     if (d < 256) d = md->lcc[d];
3103 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3104 nigel 77 }
3105     }
3106     else
3107     #endif
3108    
3109     /* Not UTF-8 mode */
3110     {
3111     for (i = 1; i <= min; i++)
3112 ph10 426 {
3113     if (eptr >= md->end_subject)
3114     {
3115     SCHECK_PARTIAL();
3116 ph10 510 MRRETURN(MATCH_NOMATCH);
3117 ph10 427 }
3118 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3119 ph10 427 }
3120 nigel 77 }
3121    
3122     if (min == max) continue;
3123    
3124     if (minimize)
3125     {
3126     #ifdef SUPPORT_UTF8
3127     /* UTF-8 mode */
3128     if (utf8)
3129     {
3130 nigel 93 register unsigned int d;
3131 nigel 77 for (fi = min;; fi++)
3132     {
3133 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3134 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3135 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3136 ph10 427 if (eptr >= md->end_subject)
3137 ph10 426 {
3138 ph10 427 SCHECK_PARTIAL();
3139 ph10 510 MRRETURN(MATCH_NOMATCH);
3140 ph10 427 }
3141 nigel 77 GETCHARINC(d, eptr);
3142     if (d < 256) d = md->lcc[d];
3143 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3144 nigel 77 }
3145     }
3146     else
3147     #endif
3148     /* Not UTF-8 mode */
3149     {
3150     for (fi = min;; fi++)
3151     {
3152 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3153 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3154 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3155 ph10 426 if (eptr >= md->end_subject)
3156     {
3157     SCHECK_PARTIAL();
3158 ph10 510 MRRETURN(MATCH_NOMATCH);
3159 ph10 426 }
3160 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3161 nigel 77 }
3162     }
3163     /* Control never gets here */
3164     }
3165    
3166     /* Maximize case */
3167    
3168     else
3169     {
3170     pp = eptr;
3171    
3172     #ifdef SUPPORT_UTF8
3173     /* UTF-8 mode */
3174     if (utf8)
3175     {
3176 nigel 93 register unsigned int d;
3177 nigel 77 for (i = min; i < max; i++)
3178     {
3179     int len = 1;
3180 ph10 463 if (eptr >= md->end_subject)
3181 ph10 462 {
3182 ph10 463 SCHECK_PARTIAL();
3183 ph10 462 break;
3184 ph10 463 }
3185 nigel 77 GETCHARLEN(d, eptr, len);
3186     if (d < 256) d = md->lcc[d];
3187     if (fc == d) break;
3188     eptr += len;
3189     }
3190 nigel 93 if (possessive) continue;
3191     for(;;)
3192 nigel 77 {
3193 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3194 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3195     if (eptr-- == pp) break; /* Stop if tried at original pos */
3196     BACKCHAR(eptr);
3197     }
3198     }
3199     else
3200     #endif
3201     /* Not UTF-8 mode */
3202     {
3203     for (i = min; i < max; i++)
3204     {
3205 ph10 463 if (eptr >= md->end_subject)
3206 ph10 462 {
3207     SCHECK_PARTIAL();
3208     break;
3209 ph10 463 }
3210 ph10 462 if (fc == md->lcc[*eptr]) break;
3211 nigel 77 eptr++;
3212     }
3213 nigel 93 if (possessive) continue;
3214 nigel 77 while (eptr >= pp)
3215     {
3216 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3217 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3218     eptr--;
3219     }
3220     }
3221    
3222 ph10 510 MRRETURN(MATCH_NOMATCH);
3223 nigel 77 }
3224     /* Control never gets here */
3225     }
3226    
3227     /* Caseful comparisons */
3228    
3229     else
3230     {
3231     #ifdef SUPPORT_UTF8
3232     /* UTF-8 mode */
3233     if (utf8)
3234     {
3235 nigel 93 register unsigned int d;
3236 nigel 77 for (i = 1; i <= min; i++)
3237     {
3238 ph10 426 if (eptr >= md->end_subject)
3239     {
3240     SCHECK_PARTIAL();
3241 ph10 510 MRRETURN(MATCH_NOMATCH);
3242 ph10 427 }
3243 nigel 77 GETCHARINC(d, eptr);
3244 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3245 nigel 77 }
3246     }
3247     else
3248     #endif
3249     /* Not UTF-8 mode */
3250     {
3251     for (i = 1; i <= min; i++)
3252 ph10 426 {
3253     if (eptr >= md->end_subject)
3254     {
3255     SCHECK_PARTIAL();
3256 ph10 510 MRRETURN(MATCH_NOMATCH);
3257 ph10 427 }
3258 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3259 ph10 427 }
3260 nigel 77 }
3261    
3262     if (min == max) continue;
3263    
3264     if (minimize)
3265     {
3266     #ifdef SUPPORT_UTF8
3267     /* UTF-8 mode */
3268     if (utf8)
3269     {
3270 nigel 93 register unsigned int d;
3271 nigel 77 for (fi = min;; fi++)
3272     {
3273 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3274 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3275 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3276 ph10 427 if (eptr >= md->end_subject)
3277 ph10 426 {
3278 ph10 427 SCHECK_PARTIAL();
3279 ph10 510 MRRETURN(MATCH_NOMATCH);
3280 ph10 427 }
3281 nigel 77 GETCHARINC(d, eptr);
3282 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3283 nigel 77 }
3284     }
3285     else
3286     #endif
3287     /* Not UTF-8 mode */
3288     {
3289     for (fi = min;; fi++)
3290     {
3291 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3292 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3293 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3294 ph10 426 if (eptr >= md->end_subject)
3295     {
3296     SCHECK_PARTIAL();
3297 ph10 510 MRRETURN(MATCH_NOMATCH);
3298 ph10 427 }
3299 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3300 nigel 77 }
3301     }
3302     /* Control never gets here */
3303     }
3304    
3305     /* Maximize case */
3306    
3307     else
3308     {
3309     pp = eptr;
3310    
3311     #ifdef SUPPORT_UTF8
3312     /* UTF-8 mode */
3313     if (utf8)
3314     {
3315 nigel 93 register unsigned int d;
3316 nigel 77 for (i = min; i < max; i++)
3317     {
3318     int len = 1;
3319 ph10 463 if (eptr >= md->end_subject)
3320 ph10 462 {
3321 ph10 463 SCHECK_PARTIAL();
3322 ph10 462 break;
3323 ph10 463 }
3324 nigel 77 GETCHARLEN(d, eptr, len);
3325     if (fc == d) break;
3326     eptr += len;
3327     }
3328 nigel 93 if (possessive) continue;
3329 nigel 77 for(;;)
3330     {
3331 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3332 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3333     if (eptr-- == pp) break; /* Stop if tried at original pos */
3334     BACKCHAR(eptr);
3335     }
3336     }
3337     else
3338     #endif
3339     /* Not UTF-8 mode */
3340     {
3341     for (i = min; i < max; i++)
3342     {
3343 ph10 463 if (eptr >= md->end_subject)
3344 ph10 462 {
3345 ph10 463 SCHECK_PARTIAL();
3346 ph10 462 break;
3347 ph10 463 }
3348 ph10 462 if (fc == *eptr) break;
3349 nigel 77 eptr++;
3350     }
3351 nigel 93 if (possessive) continue;
3352 nigel 77 while (eptr >= pp)
3353     {
3354 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3355 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3356     eptr--;
3357     }
3358     }
3359    
3360 ph10 510 MRRETURN(MATCH_NOMATCH);
3361 nigel 77 }
3362     }
3363     /* Control never gets here */
3364    
3365     /* Match a single character type repeatedly; several different opcodes
3366     share code. This is very similar to the code for single characters, but we
3367     repeat it in the interests of efficiency. */
3368    
3369     case OP_TYPEEXACT:
3370     min = max = GET2(ecode, 1);
3371     minimize = TRUE;
3372     ecode += 3;
3373     goto REPEATTYPE;
3374    
3375     case OP_TYPEUPTO:
3376     case OP_TYPEMINUPTO:
3377     min = 0;
3378     max = GET2(ecode, 1);
3379     minimize = *ecode == OP_TYPEMINUPTO;
3380     ecode += 3;
3381     goto REPEATTYPE;
3382    
3383 nigel 93 case OP_TYPEPOSSTAR:
3384     possessive = TRUE;
3385     min = 0;
3386     max = INT_MAX;
3387     ecode++;
3388     goto REPEATTYPE;
3389    
3390     case OP_TYPEPOSPLUS:
3391     possessive = TRUE;
3392     min = 1;
3393     max = INT_MAX;
3394     ecode++;
3395     goto REPEATTYPE;
3396    
3397     case OP_TYPEPOSQUERY:
3398     possessive = TRUE;
3399     min = 0;
3400     max = 1;
3401     ecode++;
3402     goto REPEATTYPE;
3403    
3404     case OP_TYPEPOSUPTO:
3405     possessive = TRUE;
3406     min = 0;
3407     max = GET2(ecode, 1);
3408     ecode += 3;
3409     goto REPEATTYPE;
3410    
3411 nigel 77 case OP_TYPESTAR:
3412     case OP_TYPEMINSTAR:
3413     case OP_TYPEPLUS:
3414     case OP_TYPEMINPLUS:
3415     case OP_TYPEQUERY:
3416     case OP_TYPEMINQUERY:
3417     c = *ecode++ - OP_TYPESTAR;
3418     minimize = (c & 1) != 0;
3419     min = rep_min[c]; /* Pick up values from tables; */
3420     max = rep_max[c]; /* zero for max => infinity */
3421     if (max == 0) max = INT_MAX;
3422    
3423     /* Common code for all repeated single character type matches. Note that
3424     in UTF-8 mode, '.' matches a character of any length, but for the other
3425     character types, the valid characters are all one-byte long. */
3426    
3427     REPEATTYPE:
3428     ctype = *ecode++; /* Code for the character type */
3429    
3430     #ifdef SUPPORT_UCP
3431     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3432     {
3433     prop_fail_result = ctype == OP_NOTPROP;
3434     prop_type = *ecode++;
3435 nigel 87 prop_value = *ecode++;
3436 nigel 77 }
3437     else prop_type = -1;
3438     #endif
3439    
3440     /* First, ensure the minimum number of matches are present. Use inline
3441     code for maximizing the speed, and do the type test once at the start
3442 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3443 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3444     and single-bytes. */
3445    
3446     if (min > 0)
3447     {
3448     #ifdef SUPPORT_UCP
3449 nigel 87 if (prop_type >= 0)
3450 nigel 77 {
3451 nigel 87 switch(prop_type)
3452 nigel 77 {
3453 nigel 87 case PT_ANY:
3454 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3455 nigel 87 for (i = 1; i <= min; i++)
3456     {
3457 ph10 427 if (eptr >= md->end_subject)
3458 ph10 426 {
3459 ph10 427 SCHECK_PARTIAL();
3460 ph10 510 MRRETURN(MATCH_NOMATCH);
3461 ph10 427 }
3462 ph10 184 GETCHARINCTEST(c, eptr);
3463 nigel 87 }
3464     break;
3465    
3466     case PT_LAMP:
3467     for (i = 1; i <= min; i++)
3468     {
3469 ph10 427 if (eptr >= md->end_subject)
3470 ph10 426 {
3471 ph10 427 SCHECK_PARTIAL();
3472 ph10 510 MRRETURN(MATCH_NOMATCH);
3473 ph10 427 }
3474 ph10 184 GETCHARINCTEST(c, eptr);
3475 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3476 nigel 87 if ((prop_chartype == ucp_Lu ||
3477     prop_chartype == ucp_Ll ||
3478     prop_chartype == ucp_Lt) == prop_fail_result)
3479 ph10 510 MRRETURN(MATCH_NOMATCH);
3480 nigel 87 }
3481     break;
3482    
3483     case PT_GC:
3484     for (i = 1; i <= min; i++)
3485     {
3486 ph10 427 if (eptr >= md->end_subject)
3487 ph10 426 {
3488 ph10 427 SCHECK_PARTIAL();
3489 ph10 510 MRRETURN(MATCH_NOMATCH);
3490 ph10 427 }
3491 ph10 184 GETCHARINCTEST(c, eptr);
3492 ph10 349 prop_category = UCD_CATEGORY(c);
3493 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3494 ph10 510 MRRETURN(MATCH_NOMATCH);
3495 nigel 87 }
3496     break;
3497    
3498     case PT_PC:
3499     for (i = 1; i <= min; i++)
3500     {
3501 ph10 427 if (eptr >= md->end_subject)
3502 ph10 426 {
3503 ph10 427 SCHECK_PARTIAL();
3504 ph10 510 MRRETURN(MATCH_NOMATCH);
3505 ph10 427 }
3506 ph10 184 GETCHARINCTEST(c, eptr);
3507 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3508 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3509 ph10 510 MRRETURN(MATCH_NOMATCH);
3510 nigel 87 }
3511     break;
3512    
3513     case PT_SC:
3514     for (i = 1; i <= min; i++)
3515     {
3516 ph10 427 if (eptr >= md->end_subject)
3517 ph10 426 {
3518 ph10 427 SCHECK_PARTIAL();
3519 ph10 510 MRRETURN(MATCH_NOMATCH);
3520 ph10 427 }
3521 ph10 184 GETCHARINCTEST(c, eptr);
3522 ph10 349 prop_script = UCD_SCRIPT(c);
3523 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3524 ph10 510 MRRETURN(MATCH_NOMATCH);
3525 nigel 87 }
3526     break;
3527 ph10 517
3528     case PT_ALNUM:
3529     for (i = 1; i <= min; i++)
3530     {
3531     if (eptr >= md->end_subject)
3532     {
3533     SCHECK_PARTIAL();
3534     MRRETURN(MATCH_NOMATCH);
3535     }
3536     GETCHARINCTEST(c, eptr);
3537     prop_category = UCD_CATEGORY(c);
3538     if ((prop_category == ucp_L || prop_category == ucp_N)
3539     == prop_fail_result)
3540     MRRETURN(MATCH_NOMATCH);
3541     }
3542     break;
3543    
3544     case PT_SPACE: /* Perl space */
3545     for (i = 1; i <= min; i++)
3546     {
3547     if (eptr >= md->end_subject)
3548     {
3549     SCHECK_PARTIAL();
3550     MRRETURN(MATCH_NOMATCH);
3551     }
3552     GETCHARINCTEST(c, eptr);
3553     prop_category = UCD_CATEGORY(c);
3554     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3555     c == CHAR_FF || c == CHAR_CR)
3556     == prop_fail_result)
3557     MRRETURN(MATCH_NOMATCH);
3558     }
3559     break;
3560    
3561     case PT_PXSPACE: /* POSIX space */
3562     for (i = 1; i <= min; i++)
3563     {
3564     if (eptr >= md->end_subject)
3565     {
3566     SCHECK_PARTIAL();
3567     MRRETURN(MATCH_NOMATCH);
3568     }
3569     GETCHARINCTEST(c, eptr);
3570     prop_category = UCD_CATEGORY(c);
3571     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3572     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3573     == prop_fail_result)
3574     MRRETURN(MATCH_NOMATCH);
3575     }
3576     break;
3577    
3578     case PT_WORD:
3579     for (i = 1; i <= min; i++)
3580     {
3581     if (eptr >= md->end_subject)
3582     {
3583     SCHECK_PARTIAL();
3584     MRRETURN(MATCH_NOMATCH);
3585     }
3586     GETCHARINCTEST(c, eptr);
3587     prop_category = UCD_CATEGORY(c);
3588     if ((prop_category == ucp_L || prop_category == ucp_N ||
3589     c == CHAR_UNDERSCORE)
3590     == prop_fail_result)
3591     MRRETURN(MATCH_NOMATCH);
3592     }
3593     break;
3594    
3595     /* This should not occur */
3596 nigel 87
3597     default:
3598     RRETURN(PCRE_ERROR_INTERNAL);
3599 nigel 77 }
3600     }
3601    
3602     /* Match extended Unicode sequences. We will get here only if the
3603     support is in the binary; otherwise a compile-time error occurs. */
3604    
3605     else if (ctype == OP_EXTUNI)
3606     {
3607     for (i = 1; i <= min; i++)
3608     {
3609 ph10 427 if (eptr >= md->end_subject)
3610 ph10 426 {
3611 ph10 427 SCHECK_PARTIAL();
3612 ph10 510 MRRETURN(MATCH_NOMATCH);
3613 ph10 427 }
3614 nigel 77 GETCHARINCTEST(c, eptr);
3615 ph10 349 prop_category = UCD_CATEGORY(c);
3616 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3617 nigel 77 while (eptr < md->end_subject)
3618     {
3619     int len = 1;
3620 ph10 426 if (!utf8) c = *eptr;
3621     else { GETCHARLEN(c, eptr, len); }
3622 ph10 349 prop_category = UCD_CATEGORY(c);
3623 nigel 77 if (prop_category != ucp_M) break;
3624     eptr += len;
3625     }
3626     }
3627     }
3628    
3629     else
3630     #endif /* SUPPORT_UCP */
3631    
3632     /* Handle all other cases when the coding is UTF-8 */
3633    
3634     #ifdef SUPPORT_UTF8
3635     if (utf8) switch(ctype)
3636     {
3637     case OP_ANY:
3638     for (i = 1; i <= min; i++)
3639     {
3640 ph10 426 if (eptr >= md->end_subject)
3641     {
3642 ph10 427 SCHECK_PARTIAL();
3643 ph10 510 MRRETURN(MATCH_NOMATCH);
3644 ph10 427 }
3645 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3646 nigel 91 eptr++;
3647 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3648     }
3649     break;
3650    
3651 ph10 341 case OP_ALLANY:
3652     for (i = 1; i <= min; i++)
3653     {
3654 ph10 427 if (eptr >= md->end_subject)
3655 ph10 426 {
3656     SCHECK_PARTIAL();
3657 ph10 510 MRRETURN(MATCH_NOMATCH);
3658 ph10 427 }
3659 ph10 341 eptr++;
3660     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3661     }
3662     break;
3663    
3664 nigel 77 case OP_ANYBYTE:
3665 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3666 nigel 77 eptr += min;
3667     break;
3668    
3669 nigel 93 case OP_ANYNL:
3670     for (i = 1; i <= min; i++)
3671     {
3672 ph10 427 if (eptr >= md->end_subject)
3673 ph10 426 {
3674     SCHECK_PARTIAL();
3675 ph10 510 MRRETURN(MATCH_NOMATCH);
3676 ph10 427 }
3677 nigel 93 GETCHARINC(c, eptr);
3678     switch(c)
3679     {
3680 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3681 nigel 93 case 0x000d:
3682     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3683     break;
3684 ph10 231
3685 nigel 93 case 0x000a:
3686 ph10 231 break;
3687    
3688 nigel 93 case 0x000b:
3689     case 0x000c:
3690     case 0x0085:
3691     case 0x2028:
3692     case 0x2029:
3693 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3694 nigel 93 break;
3695     }
3696     }
3697     break;
3698    
3699 ph10 178 case OP_NOT_HSPACE:
3700     for (i = 1; i <= min; i++)
3701     {
3702 ph10 427 if (eptr >= md->end_subject)
3703 ph10 426 {
3704     SCHECK_PARTIAL();
3705 ph10 510 MRRETURN(MATCH_NOMATCH);
3706 ph10 427 }
3707 ph10 178 GETCHARINC(c, eptr);
3708     switch(c)
3709     {
3710     default: break;
3711     case 0x09: /* HT */
3712     case 0x20: /* SPACE */
3713     case 0xa0: /* NBSP */
3714     case 0x1680: /* OGHAM SPACE MARK */
3715     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3716     case 0x2000: /* EN QUAD */
3717     case 0x2001: /* EM QUAD */
3718     case 0x2002: /* EN SPACE */
3719     case 0x2003: /* EM SPACE */
3720     case 0x2004: /* THREE-PER-EM SPACE */
3721     case 0x2005: /* FOUR-PER-EM SPACE */
3722     case 0x2006: /* SIX-PER-EM SPACE */
3723     case 0x2007: /* FIGURE SPACE */
3724     case 0x2008: /* PUNCTUATION SPACE */
3725     case 0x2009: /* THIN SPACE */
3726     case 0x200A: /* HAIR SPACE */
3727     case 0x202f: /* NARROW NO-BREAK SPACE */
3728     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3729     case 0x3000: /* IDEOGRAPHIC SPACE */
3730 ph10 510 MRRETURN(MATCH_NOMATCH);
3731 ph10 178 }
3732     }
3733     break;
3734 ph10 182
3735 ph10 178 case OP_HSPACE:
3736     for (i = 1; i <= min; i++)
3737     {
3738 ph10 427 if (eptr >= md->end_subject)
3739 ph10 426 {
3740 ph10 427 SCHECK_PARTIAL();
3741 ph10 510 MRRETURN(MATCH_NOMATCH);
3742 ph10 427 }
3743 ph10 178 GETCHARINC(c, eptr);
3744     switch(c)
3745     {
3746 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3747 ph10 178 case 0x09: /* HT */
3748     case 0x20: /* SPACE */
3749     case 0xa0: /* NBSP */
3750     case 0x1680: /* OGHAM SPACE MARK */
3751     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3752     case 0x2000: /* EN QUAD */
3753     case 0x2001: /* EM QUAD */
3754     case 0x2002: /* EN SPACE */
3755     case 0x2003: /* EM SPACE */
3756     case 0x2004: /* THREE-PER-EM SPACE */
3757     case 0x2005: /* FOUR-PER-EM SPACE */
3758     case 0x2006: /* SIX-PER-EM SPACE */
3759     case 0x2007: /* FIGURE SPACE */
3760     case 0x2008: /* PUNCTUATION SPACE */
3761     case 0x2009: /* THIN SPACE */
3762     case 0x200A: /* HAIR SPACE */
3763     case 0x202f: /* NARROW NO-BREAK SPACE */
3764     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3765     case 0x3000: /* IDEOGRAPHIC SPACE */
3766     break;
3767     }
3768     }
3769     break;
3770 ph10 182
3771 ph10 178 case OP_NOT_VSPACE:
3772     for (i = 1; i <= min; i++)
3773     {
3774 ph10 427 if (eptr >= md->end_subject)
3775 ph10 426 {
3776 ph10 427 SCHECK_PARTIAL();
3777 ph10 510 MRRETURN(MATCH_NOMATCH);
3778 ph10 427 }
3779 ph10 178 GETCHARINC(c, eptr);
3780     switch(c)
3781     {
3782     default: break;
3783     case 0x0a: /* LF */
3784     case 0x0b: /* VT */
3785     case 0x0c: /* FF */
3786     case 0x0d: /* CR */
3787     case 0x85: /* NEL */
3788     case 0x2028: /* LINE SEPARATOR */
3789     case 0x2029: /* PARAGRAPH SEPARATOR */
3790 ph10 510 MRRETURN(MATCH_NOMATCH);
3791 ph10 178 }
3792     }
3793     break;
3794 ph10 182
3795 ph10 178 case OP_VSPACE:
3796     for (i = 1; i <= min; i++)
3797     {
3798 ph10 427 if (eptr >= md->end_subject)
3799 ph10 426 {
3800 ph10 427 SCHECK_PARTIAL();
3801 ph10 510 MRRETURN(MATCH_NOMATCH);
3802 ph10 427 }
3803 ph10 178 GETCHARINC(c, eptr);
3804     switch(c)
3805     {
3806 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3807 ph10 178 case 0x0a: /* LF */
3808     case 0x0b: /* VT */
3809     case 0x0c: /* FF */
3810     case 0x0d: /* CR */
3811     case 0x85: /* NEL */
3812     case 0x2028: /* LINE SEPARATOR */
3813     case 0x2029: /* PARAGRAPH SEPARATOR */
3814 ph10 182 break;
3815 ph10 178 }
3816     }
3817     break;
3818    
3819 nigel 77 case OP_NOT_DIGIT:
3820     for (i = 1; i <= min; i++)
3821     {
3822 ph10 427 if (eptr >= md->end_subject)
3823 ph10 426 {
3824 ph10 427 SCHECK_PARTIAL();
3825 ph10 510 MRRETURN(MATCH_NOMATCH);
3826 ph10 427 }
3827 nigel 77 GETCHARINC(c, eptr);
3828     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3829 ph10 510 MRRETURN(MATCH_NOMATCH);
3830 nigel 77 }
3831     break;
3832    
3833     case OP_DIGIT:
3834     for (i = 1; i <= min; i++)
3835     {
3836 ph10 427 if (eptr >= md->end_subject)
3837 ph10 426 {
3838 ph10 427 SCHECK_PARTIAL();
3839 ph10 510 MRRETURN(MATCH_NOMATCH);
3840 ph10 427 }
3841 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3842 ph10 510 MRRETURN(MATCH_NOMATCH);
3843 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3844     }
3845     break;
3846    
3847     case OP_NOT_WHITESPACE:
3848     for (i = 1; i <= min; i++)
3849     {
3850 ph10 427 if (eptr >= md->end_subject)
3851 ph10 426 {
3852 ph10 427 SCHECK_PARTIAL();
3853 ph10 510 MRRETURN(MATCH_NOMATCH);
3854 ph10 427 }
3855 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3856 ph10 510 MRRETURN(MATCH_NOMATCH);
3857 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3858 nigel 77 }
3859     break;
3860    
3861     case OP_WHITESPACE:
3862     for (i = 1; i <= min; i++)
3863     {
3864 ph10 427 if (eptr >= md->end_subject)
3865 ph10 426 {
3866 ph10 427 SCHECK_PARTIAL();
3867 ph10 510 MRRETURN(MATCH_NOMATCH);
3868 ph10 427 }
3869 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3870 ph10 510 MRRETURN(MATCH_NOMATCH);
3871 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3872     }
3873     break;
3874    
3875     case OP_NOT_WORDCHAR:
3876     for (i = 1; i <= min; i++)
3877     {
3878 ph10 482 if (eptr >= md->end_subject)
3879     {
3880     SCHECK_PARTIAL();
3881 ph10 510 MRRETURN(MATCH_NOMATCH);
3882 ph10 482 }
3883     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3884 ph10 510 MRRETURN(MATCH_NOMATCH);
3885 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3886 nigel 77 }
3887     break;
3888    
3889     case OP_WORDCHAR:
3890     for (i = 1; i <= min; i++)
3891     {
3892 ph10 427 if (eptr >= md->end_subject)
3893 ph10 426 {
3894 ph10 427 SCHECK_PARTIAL();
3895 ph10 510 MRRETURN(MATCH_NOMATCH);
3896 ph10 427 }
3897 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3898 ph10 510 MRRETURN(MATCH_NOMATCH);
3899 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3900     }
3901     break;
3902    
3903     default:
3904     RRETURN(PCRE_ERROR_INTERNAL);
3905     } /* End switch(ctype) */
3906    
3907     else
3908     #endif /* SUPPORT_UTF8 */
3909    
3910     /* Code for the non-UTF-8 case for minimum matching of operators other
3911 ph10 426 than OP_PROP and OP_NOTPROP. */
3912 nigel 77
3913     switch(ctype)
3914     {
3915     case OP_ANY:
3916 ph10 342 for (i = 1; i <= min; i++)
3917 nigel 77 {
3918 ph10 427 if (eptr >= md->end_subject)
3919 ph10 426 {
3920 ph10 427 SCHECK_PARTIAL();
3921 ph10 510 MRRETURN(MATCH_NOMATCH);
3922 ph10 427 }
3923 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3924 ph10 342 eptr++;
3925 nigel 77 }
3926     break;
3927    
3928 ph10 341 case OP_ALLANY:
3929 ph10 443 if (eptr > md->end_subject - min)
3930 ph10 428 {
3931 ph10 443 SCHECK_PARTIAL();
3932 ph10 510 MRRETURN(MATCH_NOMATCH);
3933 ph10 443 }
3934 ph10 341 eptr += min;
3935     break;
3936    
3937 nigel 77 case OP_ANYBYTE:
3938 ph10 443 if (eptr > md->end_subject - min)
3939 ph10 428 {
3940 ph10 443 SCHECK_PARTIAL();
3941 ph10 510 MRRETURN(MATCH_NOMATCH);
3942 ph10 443 }
3943 nigel 77 eptr += min;
3944     break;
3945