/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 530 - (hide annotations) (download)
Tue Jun 1 13:42:06 2010 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 185071 byte(s)
Added a lot of (int) casts to avoid compiler warnings in systems where      
size_t is 64-bit.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259     RM61, RM62 };
260 ph10 164
261 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
262 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 ph10 501 actually used in this definition. */
264 nigel 77
265     #ifndef NO_RECURSE
266     #define REGISTER register
267 ph10 164
268 ph10 475 #ifdef PCRE_DEBUG
269 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 nigel 87 { \
271     printf("match() called in line %d\n", __LINE__); \
272 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 nigel 87 printf("to line %d\n", __LINE__); \
274     }
275     #define RRETURN(ra) \
276     { \
277     printf("match() returned %d from line %d ", ra, __LINE__); \
278     return ra; \
279     }
280     #else
281 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 nigel 77 #define RRETURN(ra) return ra
284 nigel 87 #endif
285    
286 nigel 77 #else
287    
288    
289 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
290     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291     argument of match(), which never changes. */
292 nigel 77
293     #define REGISTER
294    
295 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 nigel 77 {\
297     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298 ph10 164 frame->Xwhere = rw; \
299     newframe->Xeptr = ra;\
300     newframe->Xecode = rb;\
301 ph10 168 newframe->Xmstart = mstart;\
302 ph10 501 newframe->Xmarkptr = markptr;\
303 ph10 164 newframe->Xoffset_top = rc;\
304     newframe->Xims = re;\
305     newframe->Xeptrb = rf;\
306     newframe->Xflags = rg;\
307     newframe->Xrdepth = frame->Xrdepth + 1;\
308     newframe->Xprevframe = frame;\
309     frame = newframe;\
310     DPRINTF(("restarting from line %d\n", __LINE__));\
311     goto HEAP_RECURSE;\
312     L_##rw:\
313     DPRINTF(("jumped back to line %d\n", __LINE__));\
314 nigel 77 }
315    
316     #define RRETURN(ra)\
317     {\
318 ph10 527 heapframe *oldframe = frame;\
319     frame = oldframe->Xprevframe;\
320     (pcre_stack_free)(oldframe);\
321 nigel 77 if (frame != NULL)\
322     {\
323 ph10 164 rrc = ra;\
324     goto HEAP_RETURN;\
325 nigel 77 }\
326     return ra;\
327     }
328    
329    
330     /* Structure for remembering the local variables in a private frame */
331    
332     typedef struct heapframe {
333     struct heapframe *Xprevframe;
334    
335     /* Function arguments that may change */
336    
337 ph10 409 USPTR Xeptr;
338 nigel 77 const uschar *Xecode;
339 ph10 409 USPTR Xmstart;
340 ph10 501 USPTR Xmarkptr;
341 nigel 77 int Xoffset_top;
342     long int Xims;
343     eptrblock *Xeptrb;
344     int Xflags;
345 nigel 91 unsigned int Xrdepth;
346 nigel 77
347     /* Function local variables */
348    
349 ph10 409 USPTR Xcallpat;
350 ph10 406 #ifdef SUPPORT_UTF8
351 ph10 409 USPTR Xcharptr;
352 ph10 406 #endif
353 ph10 409 USPTR Xdata;
354     USPTR Xnext;
355     USPTR Xpp;
356     USPTR Xprev;
357     USPTR Xsaved_eptr;
358 nigel 77
359     recursion_info Xnew_recursive;
360    
361     BOOL Xcur_is_word;
362     BOOL Xcondition;
363     BOOL Xprev_is_word;
364    
365     unsigned long int Xoriginal_ims;
366    
367     #ifdef SUPPORT_UCP
368     int Xprop_type;
369 nigel 87 int Xprop_value;
370 nigel 77 int Xprop_fail_result;
371     int Xprop_category;
372     int Xprop_chartype;
373 nigel 87 int Xprop_script;
374 ph10 123 int Xoclength;
375     uschar Xocchars[8];
376 nigel 77 #endif
377    
378 ph10 403 int Xcodelink;
379 nigel 77 int Xctype;
380 nigel 93 unsigned int Xfc;
381 nigel 77 int Xfi;
382     int Xlength;
383     int Xmax;
384     int Xmin;
385     int Xnumber;
386     int Xoffset;
387     int Xop;
388     int Xsave_capture_last;
389     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
390     int Xstacksave[REC_STACK_SAVE_MAX];
391    
392     eptrblock Xnewptrb;
393    
394 ph10 164 /* Where to jump back to */
395 nigel 77
396 ph10 164 int Xwhere;
397 ph10 165
398 nigel 77 } heapframe;
399    
400     #endif
401    
402    
403     /***************************************************************************
404     ***************************************************************************/
405    
406    
407    
408     /*************************************************
409     * Match from current position *
410     *************************************************/
411    
412 nigel 93 /* This function is called recursively in many circumstances. Whenever it
413 nigel 77 returns a negative (error) response, the outer incarnation must also return the
414 ph10 426 same response. */
415 nigel 77
416 ph10 426 /* These macros pack up tests that are used for partial matching, and which
417     appears several times in the code. We set the "hit end" flag if the pointer is
418     at the end of the subject and also past the start of the subject (i.e.
419 ph10 427 something has been matched). For hard partial matching, we then return
420     immediately. The second one is used when we already know we are past the end of
421     the subject. */
422 ph10 426
423     #define CHECK_PARTIAL()\
424 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
425 ph10 427 {\
426     md->hitend = TRUE;\
427 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
428 ph10 427 }
429 ph10 426
430     #define SCHECK_PARTIAL()\
431 ph10 462 if (md->partial != 0 && eptr > mstart)\
432 ph10 427 {\
433     md->hitend = TRUE;\
434 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
435 ph10 427 }
436 ph10 426
437 ph10 427
438 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
439     the md structure (e.g. utf8, end_subject) into individual variables to improve
440 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
441     made performance worse.
442    
443     Arguments:
444 nigel 93 eptr pointer to current character in subject
445     ecode pointer to current position in compiled code
446 ph10 168 mstart pointer to the current match start position (can be modified
447 ph10 172 by encountering \K)
448 ph10 501 markptr pointer to the most recent MARK name, or NULL
449 nigel 77 offset_top current top pointer
450     md pointer to "static" info for the match
451     ims current /i, /m, and /s options
452     eptrb pointer to chain of blocks containing eptr at start of
453     brackets - for testing for empty matches
454     flags can contain
455     match_condassert - this is an assertion condition
456 nigel 93 match_cbegroup - this is the start of an unlimited repeat
457     group that can match an empty string
458 nigel 87 rdepth the recursion depth
459 nigel 77
460     Returns: MATCH_MATCH if matched ) these values are >= 0
461     MATCH_NOMATCH if failed to match )
462 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
463 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
464 nigel 87 (e.g. stopped by repeated call or recursion limit)
465 nigel 77 */
466    
467     static int
468 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
469     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
470 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
471 nigel 77 {
472     /* These variables do not need to be preserved over recursion in this function,
473 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
474     "register" because they are used a lot in loops. */
475 nigel 77
476 nigel 91 register int rrc; /* Returns from recursive calls */
477     register int i; /* Used for loops not involving calls to RMATCH() */
478 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
479 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
480 nigel 77
481 nigel 93 BOOL minimize, possessive; /* Quantifier options */
482 ph10 403 int condcode;
483 nigel 93
484 nigel 77 /* When recursion is not being used, all "local" variables that have to be
485     preserved over calls to RMATCH() are part of a "frame" which is obtained from
486     heap storage. Set up the top-level frame here; others are obtained from the
487     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
488    
489     #ifdef NO_RECURSE
490     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
491     frame->Xprevframe = NULL; /* Marks the top level */
492    
493     /* Copy in the original argument variables */
494    
495     frame->Xeptr = eptr;
496     frame->Xecode = ecode;
497 ph10 168 frame->Xmstart = mstart;
498 ph10 501 frame->Xmarkptr = markptr;
499 nigel 77 frame->Xoffset_top = offset_top;
500     frame->Xims = ims;
501     frame->Xeptrb = eptrb;
502     frame->Xflags = flags;
503 nigel 87 frame->Xrdepth = rdepth;
504 nigel 77
505     /* This is where control jumps back to to effect "recursion" */
506    
507     HEAP_RECURSE:
508    
509     /* Macros make the argument variables come from the current frame */
510    
511     #define eptr frame->Xeptr
512     #define ecode frame->Xecode
513 ph10 168 #define mstart frame->Xmstart
514 ph10 501 #define markptr frame->Xmarkptr
515 nigel 77 #define offset_top frame->Xoffset_top
516     #define ims frame->Xims
517     #define eptrb frame->Xeptrb
518     #define flags frame->Xflags
519 nigel 87 #define rdepth frame->Xrdepth
520 nigel 77
521     /* Ditto for the local variables */
522    
523     #ifdef SUPPORT_UTF8
524     #define charptr frame->Xcharptr
525     #endif
526     #define callpat frame->Xcallpat
527 ph10 403 #define codelink frame->Xcodelink
528 nigel 77 #define data frame->Xdata
529     #define next frame->Xnext
530     #define pp frame->Xpp
531     #define prev frame->Xprev
532     #define saved_eptr frame->Xsaved_eptr
533    
534     #define new_recursive frame->Xnew_recursive
535    
536     #define cur_is_word frame->Xcur_is_word
537     #define condition frame->Xcondition
538     #define prev_is_word frame->Xprev_is_word
539    
540     #define original_ims frame->Xoriginal_ims
541    
542     #ifdef SUPPORT_UCP
543     #define prop_type frame->Xprop_type
544 nigel 87 #define prop_value frame->Xprop_value
545 nigel 77 #define prop_fail_result frame->Xprop_fail_result
546     #define prop_category frame->Xprop_category
547     #define prop_chartype frame->Xprop_chartype
548 nigel 87 #define prop_script frame->Xprop_script
549 ph10 115 #define oclength frame->Xoclength
550     #define occhars frame->Xocchars
551 nigel 77 #endif
552    
553     #define ctype frame->Xctype
554     #define fc frame->Xfc
555     #define fi frame->Xfi
556     #define length frame->Xlength
557     #define max frame->Xmax
558     #define min frame->Xmin
559     #define number frame->Xnumber
560     #define offset frame->Xoffset
561     #define op frame->Xop
562     #define save_capture_last frame->Xsave_capture_last
563     #define save_offset1 frame->Xsave_offset1
564     #define save_offset2 frame->Xsave_offset2
565     #define save_offset3 frame->Xsave_offset3
566     #define stacksave frame->Xstacksave
567    
568     #define newptrb frame->Xnewptrb
569    
570     /* When recursion is being used, local variables are allocated on the stack and
571     get preserved during recursion in the normal way. In this environment, fi and
572     i, and fc and c, can be the same variables. */
573    
574 nigel 93 #else /* NO_RECURSE not defined */
575 nigel 77 #define fi i
576     #define fc c
577    
578    
579 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
580     const uschar *charptr; /* in small blocks of the code. My normal */
581     #endif /* style of coding would have declared */
582     const uschar *callpat; /* them within each of those blocks. */
583     const uschar *data; /* However, in order to accommodate the */
584     const uschar *next; /* version of this code that uses an */
585     USPTR pp; /* external "stack" implemented on the */
586     const uschar *prev; /* heap, it is easier to declare them all */
587     USPTR saved_eptr; /* here, so the declarations can be cut */
588     /* out in a block. The only declarations */
589     recursion_info new_recursive; /* within blocks below are for variables */
590     /* that do not have to be preserved over */
591     BOOL cur_is_word; /* a recursive call to RMATCH(). */
592     BOOL condition;
593 nigel 77 BOOL prev_is_word;
594    
595     unsigned long int original_ims;
596    
597     #ifdef SUPPORT_UCP
598     int prop_type;
599 nigel 87 int prop_value;
600 nigel 77 int prop_fail_result;
601     int prop_category;
602     int prop_chartype;
603 nigel 87 int prop_script;
604 ph10 115 int oclength;
605     uschar occhars[8];
606 nigel 77 #endif
607    
608 ph10 399 int codelink;
609 nigel 77 int ctype;
610     int length;
611     int max;
612     int min;
613     int number;
614     int offset;
615     int op;
616     int save_capture_last;
617     int save_offset1, save_offset2, save_offset3;
618     int stacksave[REC_STACK_SAVE_MAX];
619    
620     eptrblock newptrb;
621 nigel 93 #endif /* NO_RECURSE */
622 nigel 77
623     /* These statements are here to stop the compiler complaining about unitialized
624     variables. */
625    
626     #ifdef SUPPORT_UCP
627 nigel 87 prop_value = 0;
628 nigel 77 prop_fail_result = 0;
629     #endif
630    
631 nigel 93
632 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
633     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
634     used. Thanks to Ian Taylor for noticing this possibility and sending the
635     original patch. */
636    
637     TAIL_RECURSE:
638    
639 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
640     are specified by the macro RMATCH and RRETURN is used to return. When
641     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
642 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
643 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
644     complicated macro. It has to be used in one particular way. This shouldn't,
645     however, impact performance when true recursion is being used. */
646 nigel 77
647 ph10 164 #ifdef SUPPORT_UTF8
648     utf8 = md->utf8; /* Local copy of the flag */
649     #else
650     utf8 = FALSE;
651     #endif
652    
653 nigel 87 /* First check that we haven't called match() too many times, or that we
654     haven't exceeded the recursive call limit. */
655    
656 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
657 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
658 nigel 77
659     original_ims = ims; /* Save for resetting on ')' */
660 nigel 91
661 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
662     string, the match_cbegroup flag is set. When this is the case, add the current
663     subject pointer to the chain of such remembered pointers, to be checked when we
664     hit the closing ket, in order to break infinite loops that match no characters.
665 ph10 197 When match() is called in other circumstances, don't add to the chain. The
666     match_cbegroup flag must NOT be used with tail recursion, because the memory
667     block that is used is on the stack, so a new one may be required for each
668     match(). */
669 nigel 77
670 nigel 93 if ((flags & match_cbegroup) != 0)
671 nigel 77 {
672 ph10 197 newptrb.epb_saved_eptr = eptr;
673     newptrb.epb_prev = eptrb;
674     eptrb = &newptrb;
675 nigel 77 }
676    
677 nigel 93 /* Now start processing the opcodes. */
678 nigel 77
679     for (;;)
680     {
681 nigel 93 minimize = possessive = FALSE;
682 nigel 77 op = *ecode;
683 ph10 443
684 nigel 93 switch(op)
685     {
686 ph10 510 case OP_MARK:
687     markptr = ecode + 2;
688     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
689 ph10 512 ims, eptrb, flags, RM55);
690    
691     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
692     argument, and we must check whether that argument matches this MARK's
693     argument. It is passed back in md->start_match_ptr (an overloading of that
694     variable). If it does match, we reset that variable to the current subject
695     position and return MATCH_SKIP. Otherwise, pass back the return code
696 ph10 510 unaltered. */
697 ph10 512
698     if (rrc == MATCH_SKIP_ARG &&
699 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
700     {
701     md->start_match_ptr = eptr;
702     RRETURN(MATCH_SKIP);
703     }
704    
705 ph10 512 if (md->mark == NULL) md->mark = markptr;
706 ph10 510 RRETURN(rrc);
707    
708 ph10 210 case OP_FAIL:
709 ph10 510 MRRETURN(MATCH_NOMATCH);
710 ph10 211
711 ph10 510 case OP_COMMIT:
712     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
713     ims, eptrb, flags, RM52);
714     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
715     MRRETURN(MATCH_COMMIT);
716    
717 ph10 210 case OP_PRUNE:
718     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
719     ims, eptrb, flags, RM51);
720     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
721 ph10 510 MRRETURN(MATCH_PRUNE);
722 ph10 211
723 ph10 510 case OP_PRUNE_ARG:
724     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
725 ph10 512 ims, eptrb, flags, RM56);
726 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
727 ph10 510 md->mark = ecode + 2;
728     RRETURN(MATCH_PRUNE);
729 ph10 211
730 ph10 210 case OP_SKIP:
731     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
732     ims, eptrb, flags, RM53);
733     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
734 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
735 ph10 510 MRRETURN(MATCH_SKIP);
736 ph10 211
737 ph10 510 case OP_SKIP_ARG:
738     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
739 ph10 512 ims, eptrb, flags, RM57);
740 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
741 ph10 512
742     /* Pass back the current skip name by overloading md->start_match_ptr and
743     returning the special MATCH_SKIP_ARG return code. This will either be
744     caught by a matching MARK, or get to the top, where it is treated the same
745 ph10 510 as PRUNE. */
746 ph10 512
747 ph10 510 md->start_match_ptr = ecode + 2;
748 ph10 512 RRETURN(MATCH_SKIP_ARG);
749    
750 ph10 210 case OP_THEN:
751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
752 ph10 212 ims, eptrb, flags, RM54);
753 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
754 ph10 510 MRRETURN(MATCH_THEN);
755    
756     case OP_THEN_ARG:
757     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
758 ph10 512 ims, eptrb, flags, RM58);
759 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
760     md->mark = ecode + 2;
761 ph10 212 RRETURN(MATCH_THEN);
762 ph10 211
763 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
764     the current subject position in the working slot at the top of the vector.
765     We mustn't change the current values of the data slot, because they may be
766     set from a previous iteration of this group, and be referred to by a
767     reference inside the group.
768 nigel 77
769 nigel 93 If the bracket fails to match, we need to restore this value and also the
770     values of the final offsets, in case they were set by a previous iteration
771     of the same bracket.
772 nigel 77
773 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
774     a non-capturing bracket. Don't worry about setting the flag for the error
775     case here; that is handled in the code for KET. */
776 nigel 77
777 nigel 93 case OP_CBRA:
778     case OP_SCBRA:
779     number = GET2(ecode, 1+LINK_SIZE);
780 nigel 77 offset = number << 1;
781    
782 ph10 475 #ifdef PCRE_DEBUG
783 nigel 93 printf("start bracket %d\n", number);
784     printf("subject=");
785 nigel 77 pchars(eptr, 16, TRUE, md);
786     printf("\n");
787     #endif
788    
789     if (offset < md->offset_max)
790     {
791     save_offset1 = md->offset_vector[offset];
792     save_offset2 = md->offset_vector[offset+1];
793     save_offset3 = md->offset_vector[md->offset_end - number];
794     save_capture_last = md->capture_last;
795    
796     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
797 ph10 530 md->offset_vector[md->offset_end - number] =
798     (int)(eptr - md->start_subject);
799 nigel 77
800 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
801 nigel 77 do
802     {
803 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
804     ims, eptrb, flags, RM1);
805 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
806 nigel 77 md->capture_last = save_capture_last;
807     ecode += GET(ecode, 1);
808     }
809     while (*ecode == OP_ALT);
810    
811     DPRINTF(("bracket %d failed\n", number));
812    
813     md->offset_vector[offset] = save_offset1;
814     md->offset_vector[offset+1] = save_offset2;
815     md->offset_vector[md->offset_end - number] = save_offset3;
816    
817 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
818 nigel 77 RRETURN(MATCH_NOMATCH);
819     }
820    
821 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
822     as a non-capturing bracket. */
823 nigel 77
824 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
825     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
826    
827 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
828 nigel 77
829 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
830     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
831    
832 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
833     final alternative within the brackets, we would return the result of a
834     recursive call to match() whatever happened. We can reduce stack usage by
835 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
836     is set.*/
837 nigel 77
838 nigel 93 case OP_BRA:
839     case OP_SBRA:
840     DPRINTF(("start non-capturing bracket\n"));
841     flags = (op >= OP_SBRA)? match_cbegroup : 0;
842 nigel 91 for (;;)
843 nigel 77 {
844 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
845 nigel 93 {
846 ph10 197 if (flags == 0) /* Not a possibly empty group */
847     {
848     ecode += _pcre_OP_lengths[*ecode];
849     DPRINTF(("bracket 0 tail recursion\n"));
850     goto TAIL_RECURSE;
851     }
852    
853     /* Possibly empty group; can't use tail recursion. */
854    
855     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
856     eptrb, flags, RM48);
857 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
858     RRETURN(rrc);
859 nigel 93 }
860 nigel 91
861     /* For non-final alternatives, continue the loop for a NOMATCH result;
862     otherwise return. */
863    
864 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
865     eptrb, flags, RM2);
866 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
867 nigel 77 ecode += GET(ecode, 1);
868     }
869 nigel 91 /* Control never reaches here. */
870 nigel 77
871     /* Conditional group: compilation checked that there are no more than
872     two branches. If the condition is false, skipping the first branch takes us
873     past the end if there is only one branch, but that's OK because that is
874 nigel 91 exactly what going to the ket would do. As there is only one branch to be
875     obeyed, we can use tail recursion to avoid using another stack frame. */
876 nigel 77
877     case OP_COND:
878 nigel 93 case OP_SCOND:
879 ph10 399 codelink= GET(ecode, 1);
880 ph10 406
881 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
882     inserted between OP_COND and an assertion condition. */
883 ph10 392
884 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
885     {
886     if (pcre_callout != NULL)
887     {
888     pcre_callout_block cb;
889     cb.version = 1; /* Version 1 of the callout block */
890     cb.callout_number = ecode[LINK_SIZE+2];
891     cb.offset_vector = md->offset_vector;
892     cb.subject = (PCRE_SPTR)md->start_subject;
893 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
894     cb.start_match = (int)(mstart - md->start_subject);
895     cb.current_position = (int)(eptr - md->start_subject);
896 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
897     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
898     cb.capture_top = offset_top/2;
899     cb.capture_last = md->capture_last;
900     cb.callout_data = md->callout_data;
901 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
902 ph10 381 if (rrc < 0) RRETURN(rrc);
903     }
904     ecode += _pcre_OP_lengths[OP_CALLOUT];
905     }
906 ph10 392
907 ph10 399 condcode = ecode[LINK_SIZE+1];
908 ph10 406
909 ph10 381 /* Now see what the actual condition is */
910 ph10 392
911 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
912 nigel 77 {
913 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
914     {
915 ph10 461 condition = FALSE;
916     ecode += GET(ecode, 1);
917     }
918 ph10 459 else
919 ph10 461 {
920 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
921     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
922 ph10 461
923 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
924     false, but the test was set up by name, scan the table to see if the
925     name refers to any other numbers, and test them. The condition is true
926     if any one is set. */
927 ph10 461
928 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
929     {
930     uschar *slotA = md->name_table;
931     for (i = 0; i < md->name_count; i++)
932 ph10 461 {
933     if (GET2(slotA, 0) == recno) break;
934 ph10 459 slotA += md->name_entry_size;
935     }
936 ph10 461
937 ph10 459 /* Found a name for the number - there can be only one; duplicate
938     names for different numbers are allowed, but not vice versa. First
939     scan down for duplicates. */
940 ph10 461
941 ph10 459 if (i < md->name_count)
942 ph10 461 {
943 ph10 459 uschar *slotB = slotA;
944     while (slotB > md->name_table)
945     {
946     slotB -= md->name_entry_size;
947     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
948     {
949     condition = GET2(slotB, 0) == md->recursive->group_num;
950 ph10 461 if (condition) break;
951     }
952 ph10 459 else break;
953 ph10 461 }
954    
955 ph10 459 /* Scan up for duplicates */
956 ph10 461
957 ph10 459 if (!condition)
958 ph10 461 {
959 ph10 459 slotB = slotA;
960     for (i++; i < md->name_count; i++)
961     {
962     slotB += md->name_entry_size;
963     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
964     {
965     condition = GET2(slotB, 0) == md->recursive->group_num;
966     if (condition) break;
967 ph10 461 }
968 ph10 459 else break;
969 ph10 461 }
970     }
971 ph10 459 }
972 ph10 461 }
973    
974 ph10 459 /* Chose branch according to the condition */
975 ph10 461
976 ph10 459 ecode += condition? 3 : GET(ecode, 1);
977     }
978 ph10 461 }
979 nigel 93
980 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
981 nigel 93 {
982 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
983 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
984 ph10 461
985 ph10 459 /* If the numbered capture is unset, but the reference was by name,
986 ph10 461 scan the table to see if the name refers to any other numbers, and test
987     them. The condition is true if any one is set. This is tediously similar
988     to the code above, but not close enough to try to amalgamate. */
989    
990 ph10 459 if (!condition && condcode == OP_NCREF)
991     {
992 ph10 461 int refno = offset >> 1;
993 ph10 459 uschar *slotA = md->name_table;
994 ph10 461
995 ph10 459 for (i = 0; i < md->name_count; i++)
996 ph10 461 {
997     if (GET2(slotA, 0) == refno) break;
998 ph10 459 slotA += md->name_entry_size;
999     }
1000 ph10 461
1001     /* Found a name for the number - there can be only one; duplicate names
1002     for different numbers are allowed, but not vice versa. First scan down
1003 ph10 459 for duplicates. */
1004 ph10 461
1005 ph10 459 if (i < md->name_count)
1006 ph10 461 {
1007 ph10 459 uschar *slotB = slotA;
1008     while (slotB > md->name_table)
1009     {
1010     slotB -= md->name_entry_size;
1011     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1012     {
1013     offset = GET2(slotB, 0) << 1;
1014 ph10 461 condition = offset < offset_top &&
1015 ph10 459 md->offset_vector[offset] >= 0;
1016 ph10 461 if (condition) break;
1017     }
1018 ph10 459 else break;
1019 ph10 461 }
1020    
1021 ph10 459 /* Scan up for duplicates */
1022 ph10 461
1023 ph10 459 if (!condition)
1024 ph10 461 {
1025 ph10 459 slotB = slotA;
1026     for (i++; i < md->name_count; i++)
1027     {
1028     slotB += md->name_entry_size;
1029     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1030     {
1031     offset = GET2(slotB, 0) << 1;
1032 ph10 461 condition = offset < offset_top &&
1033 ph10 459 md->offset_vector[offset] >= 0;
1034 ph10 461 if (condition) break;
1035     }
1036 ph10 459 else break;
1037 ph10 461 }
1038     }
1039 ph10 459 }
1040 ph10 461 }
1041    
1042 ph10 459 /* Chose branch according to the condition */
1043    
1044 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1045 nigel 77 }
1046    
1047 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1048 nigel 93 {
1049     condition = FALSE;
1050     ecode += GET(ecode, 1);
1051     }
1052    
1053 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1054 nigel 93 the final argument match_condassert causes it to stop at the end of an
1055     assertion. */
1056 nigel 77
1057     else
1058     {
1059 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1060     match_condassert, RM3);
1061 nigel 77 if (rrc == MATCH_MATCH)
1062     {
1063 nigel 93 condition = TRUE;
1064     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1065 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1066     }
1067 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1068 nigel 77 {
1069     RRETURN(rrc); /* Need braces because of following else */
1070     }
1071 nigel 93 else
1072     {
1073     condition = FALSE;
1074 ph10 399 ecode += codelink;
1075 nigel 93 }
1076     }
1077 nigel 91
1078 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1079 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1080     match_cbegroup is required for an unlimited repeat of a possibly empty
1081     group. If the second alternative doesn't exist, we can just plough on. */
1082 nigel 91
1083 nigel 93 if (condition || *ecode == OP_ALT)
1084     {
1085 nigel 91 ecode += 1 + LINK_SIZE;
1086 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1087     {
1088     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1089     RRETURN(rrc);
1090     }
1091     else /* Group must match something */
1092     {
1093     flags = 0;
1094     goto TAIL_RECURSE;
1095     }
1096 nigel 77 }
1097 ph10 395 else /* Condition false & no alternative */
1098 nigel 93 {
1099     ecode += 1 + LINK_SIZE;
1100     }
1101     break;
1102 nigel 77
1103 ph10 461
1104 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1105     to close any currently open capturing brackets. */
1106 ph10 461
1107 ph10 447 case OP_CLOSE:
1108 ph10 461 number = GET2(ecode, 1);
1109 ph10 447 offset = number << 1;
1110 ph10 461
1111 ph10 475 #ifdef PCRE_DEBUG
1112 ph10 447 printf("end bracket %d at *ACCEPT", number);
1113     printf("\n");
1114     #endif
1115 nigel 77
1116 ph10 447 md->capture_last = number;
1117     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1118     {
1119     md->offset_vector[offset] =
1120     md->offset_vector[md->offset_end - number];
1121 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1122 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1123     }
1124     ecode += 3;
1125 ph10 461 break;
1126 ph10 447
1127    
1128 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1129     recursion, we should restore the offsets appropriately and continue from
1130     after the call. */
1131 nigel 77
1132 ph10 210 case OP_ACCEPT:
1133 nigel 77 case OP_END:
1134     if (md->recursive != NULL && md->recursive->group_num == 0)
1135     {
1136     recursion_info *rec = md->recursive;
1137 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1138 nigel 77 md->recursive = rec->prevrec;
1139     memmove(md->offset_vector, rec->offset_save,
1140     rec->saved_max * sizeof(int));
1141 ph10 461 offset_top = rec->save_offset_top;
1142 nigel 77 ims = original_ims;
1143     ecode = rec->after_call;
1144     break;
1145     }
1146    
1147 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1148     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1149     the subject. In both cases, backtracking will then try other alternatives,
1150     if any. */
1151 ph10 443
1152 ph10 442 if (eptr == mstart &&
1153     (md->notempty ||
1154 ph10 443 (md->notempty_atstart &&
1155 ph10 442 mstart == md->start_subject + md->start_offset)))
1156 ph10 510 MRRETURN(MATCH_NOMATCH);
1157 ph10 443
1158 ph10 442 /* Otherwise, we have a match. */
1159 nigel 77
1160 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1161     md->end_offset_top = offset_top; /* and how many extracts were taken */
1162 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1163 nigel 77
1164 ph10 512 /* For some reason, the macros don't work properly if an expression is
1165     given as the argument to MRRETURN when the heap is in use. */
1166    
1167     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1168     MRRETURN(rrc);
1169    
1170 nigel 77 /* Change option settings */
1171    
1172     case OP_OPT:
1173     ims = ecode[1];
1174     ecode += 2;
1175     DPRINTF(("ims set to %02lx\n", ims));
1176     break;
1177    
1178     /* Assertion brackets. Check the alternative branches in turn - the
1179     matching won't pass the KET for an assertion. If any one branch matches,
1180     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1181     start of each branch to move the current point backwards, so the code at
1182     this level is identical to the lookahead case. */
1183    
1184     case OP_ASSERT:
1185     case OP_ASSERTBACK:
1186     do
1187     {
1188 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1189     RM4);
1190 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1191 ph10 500 {
1192     mstart = md->start_match_ptr; /* In case \K reset it */
1193     break;
1194 ph10 501 }
1195 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1196 nigel 77 ecode += GET(ecode, 1);
1197     }
1198     while (*ecode == OP_ALT);
1199 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1200 nigel 77
1201     /* If checking an assertion for a condition, return MATCH_MATCH. */
1202    
1203     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1204    
1205     /* Continue from after the assertion, updating the offsets high water
1206     mark, since extracts may have been taken during the assertion. */
1207    
1208     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1209     ecode += 1 + LINK_SIZE;
1210     offset_top = md->end_offset_top;
1211     continue;
1212    
1213 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1214 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1215 ph10 473 branches. */
1216 nigel 77
1217     case OP_ASSERT_NOT:
1218     case OP_ASSERTBACK_NOT:
1219     do
1220     {
1221 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1222     RM5);
1223 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1224 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1225     {
1226     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1227 ph10 482 break;
1228     }
1229 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1230 nigel 77 ecode += GET(ecode,1);
1231     }
1232     while (*ecode == OP_ALT);
1233    
1234     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1235    
1236     ecode += 1 + LINK_SIZE;
1237     continue;
1238    
1239     /* Move the subject pointer back. This occurs only at the start of
1240     each branch of a lookbehind assertion. If we are too close to the start to
1241     move back, this match function fails. When working with UTF-8 we move
1242     back a number of characters, not bytes. */
1243    
1244     case OP_REVERSE:
1245     #ifdef SUPPORT_UTF8
1246     if (utf8)
1247     {
1248 nigel 93 i = GET(ecode, 1);
1249     while (i-- > 0)
1250 nigel 77 {
1251     eptr--;
1252 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1253 ph10 207 BACKCHAR(eptr);
1254 nigel 77 }
1255     }
1256     else
1257     #endif
1258    
1259     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1260    
1261     {
1262 nigel 93 eptr -= GET(ecode, 1);
1263 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1264 nigel 77 }
1265    
1266 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1267 nigel 77
1268 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1269 nigel 77 ecode += 1 + LINK_SIZE;
1270     break;
1271    
1272     /* The callout item calls an external function, if one is provided, passing
1273     details of the match so far. This is mainly for debugging, though the
1274     function is able to force a failure. */
1275    
1276     case OP_CALLOUT:
1277     if (pcre_callout != NULL)
1278     {
1279     pcre_callout_block cb;
1280     cb.version = 1; /* Version 1 of the callout block */
1281     cb.callout_number = ecode[1];
1282     cb.offset_vector = md->offset_vector;
1283 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1284 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1285     cb.start_match = (int)(mstart - md->start_subject);
1286     cb.current_position = (int)(eptr - md->start_subject);
1287 nigel 77 cb.pattern_position = GET(ecode, 2);
1288     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1289     cb.capture_top = offset_top/2;
1290     cb.capture_last = md->capture_last;
1291     cb.callout_data = md->callout_data;
1292 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1293 nigel 77 if (rrc < 0) RRETURN(rrc);
1294     }
1295     ecode += 2 + 2*LINK_SIZE;
1296     break;
1297    
1298     /* Recursion either matches the current regex, or some subexpression. The
1299     offset data is the offset to the starting bracket from the start of the
1300     whole pattern. (This is so that it works from duplicated subpatterns.)
1301    
1302     If there are any capturing brackets started but not finished, we have to
1303     save their starting points and reinstate them after the recursion. However,
1304     we don't know how many such there are (offset_top records the completed
1305     total) so we just have to save all the potential data. There may be up to
1306     65535 such values, which is too large to put on the stack, but using malloc
1307     for small numbers seems expensive. As a compromise, the stack is used when
1308     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1309     is used. A problem is what to do if the malloc fails ... there is no way of
1310     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1311     values on the stack, and accept that the rest may be wrong.
1312    
1313     There are also other values that have to be saved. We use a chained
1314     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1315     for the original version of this logic. */
1316    
1317     case OP_RECURSE:
1318     {
1319     callpat = md->start_code + GET(ecode, 1);
1320 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1321     GET2(callpat, 1 + LINK_SIZE);
1322 nigel 77
1323     /* Add to "recursing stack" */
1324    
1325     new_recursive.prevrec = md->recursive;
1326     md->recursive = &new_recursive;
1327    
1328     /* Find where to continue from afterwards */
1329    
1330     ecode += 1 + LINK_SIZE;
1331     new_recursive.after_call = ecode;
1332    
1333     /* Now save the offset data. */
1334    
1335     new_recursive.saved_max = md->offset_end;
1336     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1337     new_recursive.offset_save = stacksave;
1338     else
1339     {
1340     new_recursive.offset_save =
1341     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1342     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1343     }
1344    
1345     memcpy(new_recursive.offset_save, md->offset_vector,
1346     new_recursive.saved_max * sizeof(int));
1347 ph10 461 new_recursive.save_offset_top = offset_top;
1348 nigel 77
1349     /* OK, now we can do the recursion. For each top-level alternative we
1350     restore the offset and recursion data. */
1351    
1352     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1353 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1354 nigel 77 do
1355     {
1356 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1357     md, ims, eptrb, flags, RM6);
1358 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1359 nigel 77 {
1360 nigel 87 DPRINTF(("Recursion matched\n"));
1361 nigel 77 md->recursive = new_recursive.prevrec;
1362     if (new_recursive.offset_save != stacksave)
1363     (pcre_free)(new_recursive.offset_save);
1364 ph10 510 MRRETURN(MATCH_MATCH);
1365 nigel 77 }
1366 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1367 nigel 87 {
1368     DPRINTF(("Recursion gave error %d\n", rrc));
1369 ph10 400 if (new_recursive.offset_save != stacksave)
1370     (pcre_free)(new_recursive.offset_save);
1371 nigel 87 RRETURN(rrc);
1372     }
1373 nigel 77
1374     md->recursive = &new_recursive;
1375     memcpy(md->offset_vector, new_recursive.offset_save,
1376     new_recursive.saved_max * sizeof(int));
1377     callpat += GET(callpat, 1);
1378     }
1379     while (*callpat == OP_ALT);
1380    
1381     DPRINTF(("Recursion didn't match\n"));
1382     md->recursive = new_recursive.prevrec;
1383     if (new_recursive.offset_save != stacksave)
1384     (pcre_free)(new_recursive.offset_save);
1385 ph10 510 MRRETURN(MATCH_NOMATCH);
1386 nigel 77 }
1387     /* Control never reaches here */
1388    
1389     /* "Once" brackets are like assertion brackets except that after a match,
1390     the point in the subject string is not moved back. Thus there can never be
1391     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1392     Check the alternative branches in turn - the matching won't pass the KET
1393     for this kind of subpattern. If any one branch matches, we carry on as at
1394 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1395     the start-of-match value in case it was changed by \K. */
1396 nigel 77
1397     case OP_ONCE:
1398 nigel 91 prev = ecode;
1399     saved_eptr = eptr;
1400    
1401     do
1402 nigel 77 {
1403 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1404 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1405 ph10 500 {
1406     mstart = md->start_match_ptr;
1407     break;
1408 ph10 501 }
1409 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1410 nigel 91 ecode += GET(ecode,1);
1411     }
1412     while (*ecode == OP_ALT);
1413 nigel 77
1414 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1415 nigel 77
1416 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1417 nigel 77
1418 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1419     mark, since extracts may have been taken. */
1420 nigel 77
1421 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1422 nigel 77
1423 nigel 91 offset_top = md->end_offset_top;
1424     eptr = md->end_match_ptr;
1425 nigel 77
1426 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1427     happens for a repeating ket if no characters were matched in the group.
1428     This is the forcible breaking of infinite loops as implemented in Perl
1429     5.005. If there is an options reset, it will get obeyed in the normal
1430     course of events. */
1431 nigel 77
1432 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1433     {
1434     ecode += 1+LINK_SIZE;
1435     break;
1436     }
1437 nigel 77
1438 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1439     preceding bracket, in the appropriate order. The second "call" of match()
1440     uses tail recursion, to avoid using another stack frame. We need to reset
1441     any options that changed within the bracket before re-running it, so
1442     check the next opcode. */
1443 nigel 77
1444 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1445     {
1446     ims = (ims & ~PCRE_IMS) | ecode[4];
1447     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1448     }
1449 nigel 77
1450 nigel 91 if (*ecode == OP_KETRMIN)
1451     {
1452 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1453 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454     ecode = prev;
1455 ph10 197 flags = 0;
1456 nigel 91 goto TAIL_RECURSE;
1457 nigel 77 }
1458 nigel 91 else /* OP_KETRMAX */
1459     {
1460 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1461 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1462     ecode += 1 + LINK_SIZE;
1463 ph10 197 flags = 0;
1464 nigel 91 goto TAIL_RECURSE;
1465     }
1466     /* Control never gets here */
1467 nigel 77
1468     /* An alternation is the end of a branch; scan along to find the end of the
1469     bracketed group and go to there. */
1470    
1471     case OP_ALT:
1472     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1473     break;
1474    
1475 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1476     indicating that it may occur zero times. It may repeat infinitely, or not
1477     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1478     with fixed upper repeat limits are compiled as a number of copies, with the
1479     optional ones preceded by BRAZERO or BRAMINZERO. */
1480 nigel 77
1481     case OP_BRAZERO:
1482     {
1483     next = ecode+1;
1484 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1485 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486     do next += GET(next,1); while (*next == OP_ALT);
1487 nigel 93 ecode = next + 1 + LINK_SIZE;
1488 nigel 77 }
1489     break;
1490    
1491     case OP_BRAMINZERO:
1492     {
1493     next = ecode+1;
1494 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1495 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1496 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1497     ecode++;
1498     }
1499     break;
1500    
1501 ph10 335 case OP_SKIPZERO:
1502     {
1503     next = ecode+1;
1504     do next += GET(next,1); while (*next == OP_ALT);
1505     ecode = next + 1 + LINK_SIZE;
1506     }
1507     break;
1508    
1509 nigel 93 /* End of a group, repeated or non-repeating. */
1510 nigel 77
1511     case OP_KET:
1512     case OP_KETRMIN:
1513     case OP_KETRMAX:
1514 nigel 91 prev = ecode - GET(ecode, 1);
1515 nigel 77
1516 nigel 93 /* If this was a group that remembered the subject start, in order to break
1517     infinite repeats of empty string matches, retrieve the subject start from
1518     the chain. Otherwise, set it NULL. */
1519 nigel 77
1520 nigel 93 if (*prev >= OP_SBRA)
1521     {
1522     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1523     eptrb = eptrb->epb_prev; /* Backup to previous group */
1524     }
1525     else saved_eptr = NULL;
1526 nigel 77
1527 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1528     matching and return MATCH_MATCH, but record the current high water mark for
1529     use by positive assertions. We also need to record the match start in case
1530     it was changed by \K. */
1531 nigel 93
1532 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1533     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1534     *prev == OP_ONCE)
1535     {
1536     md->end_match_ptr = eptr; /* For ONCE */
1537     md->end_offset_top = offset_top;
1538 ph10 500 md->start_match_ptr = mstart;
1539 ph10 510 MRRETURN(MATCH_MATCH);
1540 nigel 91 }
1541 nigel 77
1542 nigel 93 /* For capturing groups we have to check the group number back at the start
1543     and if necessary complete handling an extraction by setting the offsets and
1544     bumping the high water mark. Note that whole-pattern recursion is coded as
1545     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1546     when the OP_END is reached. Other recursion is handled here. */
1547 nigel 77
1548 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1549 nigel 91 {
1550 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1551 nigel 91 offset = number << 1;
1552 ph10 461
1553 ph10 475 #ifdef PCRE_DEBUG
1554 nigel 91 printf("end bracket %d", number);
1555     printf("\n");
1556 nigel 77 #endif
1557    
1558 nigel 93 md->capture_last = number;
1559     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1560 nigel 91 {
1561 nigel 93 md->offset_vector[offset] =
1562     md->offset_vector[md->offset_end - number];
1563 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1564 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1565     }
1566 nigel 77
1567 nigel 93 /* Handle a recursively called group. Restore the offsets
1568     appropriately and continue from after the call. */
1569 nigel 77
1570 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1571     {
1572     recursion_info *rec = md->recursive;
1573     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1574     md->recursive = rec->prevrec;
1575     memcpy(md->offset_vector, rec->offset_save,
1576     rec->saved_max * sizeof(int));
1577 ph10 461 offset_top = rec->save_offset_top;
1578 nigel 93 ecode = rec->after_call;
1579     ims = original_ims;
1580     break;
1581 nigel 77 }
1582 nigel 91 }
1583 nigel 77
1584 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1585     flags, in case they got changed during the group. */
1586 nigel 77
1587 nigel 91 ims = original_ims;
1588     DPRINTF(("ims reset to %02lx\n", ims));
1589 nigel 77
1590 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1591     happens for a repeating ket if no characters were matched in the group.
1592     This is the forcible breaking of infinite loops as implemented in Perl
1593     5.005. If there is an options reset, it will get obeyed in the normal
1594     course of events. */
1595 nigel 77
1596 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1597     {
1598     ecode += 1 + LINK_SIZE;
1599     break;
1600     }
1601 nigel 77
1602 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1603     preceding bracket, in the appropriate order. In the second case, we can use
1604 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1605     unlimited repeat of a group that can match an empty string. */
1606 nigel 77
1607 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1608    
1609 nigel 91 if (*ecode == OP_KETRMIN)
1610     {
1611 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1612 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1613 ph10 197 if (flags != 0) /* Could match an empty string */
1614     {
1615     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1616     RRETURN(rrc);
1617     }
1618 nigel 91 ecode = prev;
1619     goto TAIL_RECURSE;
1620 nigel 77 }
1621 nigel 91 else /* OP_KETRMAX */
1622     {
1623 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1624 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1625     ecode += 1 + LINK_SIZE;
1626 ph10 197 flags = 0;
1627 nigel 91 goto TAIL_RECURSE;
1628     }
1629     /* Control never gets here */
1630 nigel 77
1631     /* Start of subject unless notbol, or after internal newline if multiline */
1632    
1633     case OP_CIRC:
1634 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1635 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1636     {
1637 nigel 91 if (eptr != md->start_subject &&
1638 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1639 ph10 510 MRRETURN(MATCH_NOMATCH);
1640 nigel 77 ecode++;
1641     break;
1642     }
1643     /* ... else fall through */
1644    
1645     /* Start of subject assertion */
1646    
1647     case OP_SOD:
1648 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1649 nigel 77 ecode++;
1650     break;
1651    
1652     /* Start of match assertion */
1653    
1654     case OP_SOM:
1655 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1656 nigel 77 ecode++;
1657     break;
1658 ph10 172
1659 ph10 168 /* Reset the start of match point */
1660 ph10 172
1661 ph10 168 case OP_SET_SOM:
1662     mstart = eptr;
1663 ph10 172 ecode++;
1664     break;
1665 nigel 77
1666     /* Assert before internal newline if multiline, or before a terminating
1667     newline unless endonly is set, else end of subject unless noteol is set. */
1668    
1669     case OP_DOLL:
1670     if ((ims & PCRE_MULTILINE) != 0)
1671     {
1672     if (eptr < md->end_subject)
1673 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1674 nigel 77 else
1675 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1676 nigel 77 ecode++;
1677     break;
1678     }
1679     else
1680     {
1681 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1682 nigel 77 if (!md->endonly)
1683     {
1684 nigel 91 if (eptr != md->end_subject &&
1685 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1686 ph10 510 MRRETURN(MATCH_NOMATCH);
1687 nigel 77 ecode++;
1688     break;
1689     }
1690     }
1691 nigel 91 /* ... else fall through for endonly */
1692 nigel 77
1693     /* End of subject assertion (\z) */
1694    
1695     case OP_EOD:
1696 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1697 nigel 77 ecode++;
1698     break;
1699    
1700     /* End of subject or ending \n assertion (\Z) */
1701    
1702     case OP_EODN:
1703 nigel 91 if (eptr != md->end_subject &&
1704 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1705 ph10 510 MRRETURN(MATCH_NOMATCH);
1706 nigel 77 ecode++;
1707     break;
1708    
1709     /* Word boundary assertions */
1710    
1711     case OP_NOT_WORD_BOUNDARY:
1712     case OP_WORD_BOUNDARY:
1713     {
1714    
1715     /* Find out if the previous and current characters are "word" characters.
1716     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1717 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1718 ph10 435 partial matching. */
1719 nigel 77
1720     #ifdef SUPPORT_UTF8
1721     if (utf8)
1722     {
1723 ph10 518 /* Get status of previous character */
1724 ph10 527
1725 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1726     {
1727 ph10 409 USPTR lastptr = eptr - 1;
1728 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1729 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1730 nigel 77 GETCHAR(c, lastptr);
1731 ph10 527 #ifdef SUPPORT_UCP
1732 ph10 518 if (md->use_ucp)
1733     {
1734     if (c == '_') prev_is_word = TRUE; else
1735 ph10 527 {
1736 ph10 518 int cat = UCD_CATEGORY(c);
1737     prev_is_word = (cat == ucp_L || cat == ucp_N);
1738 ph10 527 }
1739     }
1740     else
1741     #endif
1742 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1743     }
1744 ph10 527
1745 ph10 518 /* Get status of next character */
1746 ph10 527
1747 ph10 443 if (eptr >= md->end_subject)
1748 nigel 77 {
1749 ph10 443 SCHECK_PARTIAL();
1750     cur_is_word = FALSE;
1751 ph10 428 }
1752     else
1753     {
1754 nigel 77 GETCHAR(c, eptr);
1755 ph10 527 #ifdef SUPPORT_UCP
1756 ph10 518 if (md->use_ucp)
1757     {
1758     if (c == '_') cur_is_word = TRUE; else
1759 ph10 527 {
1760 ph10 518 int cat = UCD_CATEGORY(c);
1761     cur_is_word = (cat == ucp_L || cat == ucp_N);
1762 ph10 527 }
1763     }
1764     else
1765     #endif
1766 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1767     }
1768     }
1769     else
1770     #endif
1771    
1772 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1773 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1774 nigel 77
1775     {
1776 ph10 518 /* Get status of previous character */
1777 ph10 527
1778 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1779     {
1780 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1781 ph10 527 #ifdef SUPPORT_UCP
1782 ph10 518 if (md->use_ucp)
1783     {
1784 ph10 527 c = eptr[-1];
1785 ph10 518 if (c == '_') prev_is_word = TRUE; else
1786 ph10 527 {
1787 ph10 518 int cat = UCD_CATEGORY(c);
1788     prev_is_word = (cat == ucp_L || cat == ucp_N);
1789 ph10 527 }
1790     }
1791     else
1792     #endif
1793 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1794     }
1795 ph10 527
1796 ph10 518 /* Get status of next character */
1797 ph10 527
1798 ph10 443 if (eptr >= md->end_subject)
1799 ph10 428 {
1800 ph10 443 SCHECK_PARTIAL();
1801     cur_is_word = FALSE;
1802 ph10 428 }
1803 ph10 527 else
1804     #ifdef SUPPORT_UCP
1805 ph10 518 if (md->use_ucp)
1806     {
1807 ph10 527 c = *eptr;
1808 ph10 518 if (c == '_') cur_is_word = TRUE; else
1809 ph10 527 {
1810 ph10 518 int cat = UCD_CATEGORY(c);
1811     cur_is_word = (cat == ucp_L || cat == ucp_N);
1812 ph10 527 }
1813     }
1814     else
1815     #endif
1816 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1817 nigel 77 }
1818    
1819     /* Now see if the situation is what we want */
1820    
1821     if ((*ecode++ == OP_WORD_BOUNDARY)?
1822     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1823 ph10 510 MRRETURN(MATCH_NOMATCH);
1824 nigel 77 }
1825     break;
1826    
1827     /* Match a single character type; inline for speed */
1828    
1829     case OP_ANY:
1830 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1831 ph10 345 /* Fall through */
1832    
1833 ph10 341 case OP_ALLANY:
1834 ph10 443 if (eptr++ >= md->end_subject)
1835 ph10 428 {
1836 ph10 443 SCHECK_PARTIAL();
1837 ph10 510 MRRETURN(MATCH_NOMATCH);
1838 ph10 443 }
1839 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1840 nigel 77 ecode++;
1841     break;
1842    
1843     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1844     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1845    
1846     case OP_ANYBYTE:
1847 ph10 443 if (eptr++ >= md->end_subject)
1848 ph10 428 {
1849 ph10 443 SCHECK_PARTIAL();
1850 ph10 510 MRRETURN(MATCH_NOMATCH);
1851 ph10 443 }
1852 nigel 77 ecode++;
1853     break;
1854    
1855     case OP_NOT_DIGIT:
1856 ph10 443 if (eptr >= md->end_subject)
1857 ph10 428 {
1858 ph10 443 SCHECK_PARTIAL();
1859 ph10 510 MRRETURN(MATCH_NOMATCH);
1860 ph10 443 }
1861 nigel 77 GETCHARINCTEST(c, eptr);
1862     if (
1863     #ifdef SUPPORT_UTF8
1864     c < 256 &&
1865     #endif
1866     (md->ctypes[c] & ctype_digit) != 0
1867     )
1868 ph10 510 MRRETURN(MATCH_NOMATCH);
1869 nigel 77 ecode++;
1870     break;
1871    
1872     case OP_DIGIT:
1873 ph10 443 if (eptr >= md->end_subject)
1874 ph10 428 {
1875 ph10 443 SCHECK_PARTIAL();
1876 ph10 510 MRRETURN(MATCH_NOMATCH);
1877 ph10 443 }
1878 nigel 77 GETCHARINCTEST(c, eptr);
1879     if (
1880     #ifdef SUPPORT_UTF8
1881     c >= 256 ||
1882     #endif
1883     (md->ctypes[c] & ctype_digit) == 0
1884     )
1885 ph10 510 MRRETURN(MATCH_NOMATCH);
1886 nigel 77 ecode++;
1887     break;
1888    
1889     case OP_NOT_WHITESPACE:
1890 ph10 443 if (eptr >= md->end_subject)
1891 ph10 428 {
1892 ph10 443 SCHECK_PARTIAL();
1893 ph10 510 MRRETURN(MATCH_NOMATCH);
1894 ph10 443 }
1895 nigel 77 GETCHARINCTEST(c, eptr);
1896     if (
1897     #ifdef SUPPORT_UTF8
1898     c < 256 &&
1899     #endif
1900     (md->ctypes[c] & ctype_space) != 0
1901     )
1902 ph10 510 MRRETURN(MATCH_NOMATCH);
1903 nigel 77 ecode++;
1904     break;
1905    
1906     case OP_WHITESPACE:
1907 ph10 443 if (eptr >= md->end_subject)
1908 ph10 428 {
1909 ph10 443 SCHECK_PARTIAL();
1910 ph10 510 MRRETURN(MATCH_NOMATCH);
1911 ph10 443 }
1912 nigel 77 GETCHARINCTEST(c, eptr);
1913     if (
1914     #ifdef SUPPORT_UTF8
1915     c >= 256 ||
1916     #endif
1917     (md->ctypes[c] & ctype_space) == 0
1918     )
1919 ph10 510 MRRETURN(MATCH_NOMATCH);
1920 nigel 77 ecode++;
1921     break;
1922    
1923     case OP_NOT_WORDCHAR:
1924 ph10 443 if (eptr >= md->end_subject)
1925 ph10 428 {
1926 ph10 443 SCHECK_PARTIAL();
1927 ph10 510 MRRETURN(MATCH_NOMATCH);
1928 ph10 443 }
1929 nigel 77 GETCHARINCTEST(c, eptr);
1930     if (
1931     #ifdef SUPPORT_UTF8
1932     c < 256 &&
1933     #endif
1934     (md->ctypes[c] & ctype_word) != 0
1935     )
1936 ph10 510 MRRETURN(MATCH_NOMATCH);
1937 nigel 77 ecode++;
1938     break;
1939    
1940     case OP_WORDCHAR:
1941 ph10 443 if (eptr >= md->end_subject)
1942 ph10 428 {
1943 ph10 443 SCHECK_PARTIAL();
1944 ph10 510 MRRETURN(MATCH_NOMATCH);
1945 ph10 443 }
1946 nigel 77 GETCHARINCTEST(c, eptr);
1947     if (
1948     #ifdef SUPPORT_UTF8
1949     c >= 256 ||
1950     #endif
1951     (md->ctypes[c] & ctype_word) == 0
1952     )
1953 ph10 510 MRRETURN(MATCH_NOMATCH);
1954 nigel 77 ecode++;
1955     break;
1956    
1957 nigel 93 case OP_ANYNL:
1958 ph10 443 if (eptr >= md->end_subject)
1959 ph10 428 {
1960 ph10 443 SCHECK_PARTIAL();
1961 ph10 510 MRRETURN(MATCH_NOMATCH);
1962 ph10 443 }
1963 nigel 93 GETCHARINCTEST(c, eptr);
1964     switch(c)
1965     {
1966 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1967 nigel 93 case 0x000d:
1968     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1969     break;
1970 ph10 231
1971 nigel 93 case 0x000a:
1972 ph10 231 break;
1973    
1974 nigel 93 case 0x000b:
1975     case 0x000c:
1976     case 0x0085:
1977     case 0x2028:
1978     case 0x2029:
1979 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1980 nigel 93 break;
1981     }
1982     ecode++;
1983     break;
1984    
1985 ph10 178 case OP_NOT_HSPACE:
1986 ph10 443 if (eptr >= md->end_subject)
1987 ph10 428 {
1988 ph10 443 SCHECK_PARTIAL();
1989 ph10 510 MRRETURN(MATCH_NOMATCH);
1990 ph10 443 }
1991 ph10 178 GETCHARINCTEST(c, eptr);
1992     switch(c)
1993     {
1994     default: break;
1995     case 0x09: /* HT */
1996     case 0x20: /* SPACE */
1997     case 0xa0: /* NBSP */
1998     case 0x1680: /* OGHAM SPACE MARK */
1999     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2000     case 0x2000: /* EN QUAD */
2001     case 0x2001: /* EM QUAD */
2002     case 0x2002: /* EN SPACE */
2003     case 0x2003: /* EM SPACE */
2004     case 0x2004: /* THREE-PER-EM SPACE */
2005     case 0x2005: /* FOUR-PER-EM SPACE */
2006     case 0x2006: /* SIX-PER-EM SPACE */
2007     case 0x2007: /* FIGURE SPACE */
2008     case 0x2008: /* PUNCTUATION SPACE */
2009     case 0x2009: /* THIN SPACE */
2010     case 0x200A: /* HAIR SPACE */
2011     case 0x202f: /* NARROW NO-BREAK SPACE */
2012     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2013     case 0x3000: /* IDEOGRAPHIC SPACE */
2014 ph10 510 MRRETURN(MATCH_NOMATCH);
2015 ph10 178 }
2016     ecode++;
2017     break;
2018    
2019     case OP_HSPACE:
2020 ph10 443 if (eptr >= md->end_subject)
2021 ph10 428 {
2022 ph10 443 SCHECK_PARTIAL();
2023 ph10 510 MRRETURN(MATCH_NOMATCH);
2024 ph10 443 }
2025 ph10 178 GETCHARINCTEST(c, eptr);
2026     switch(c)
2027     {
2028 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2029 ph10 178 case 0x09: /* HT */
2030     case 0x20: /* SPACE */
2031     case 0xa0: /* NBSP */
2032     case 0x1680: /* OGHAM SPACE MARK */
2033     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2034     case 0x2000: /* EN QUAD */
2035     case 0x2001: /* EM QUAD */
2036     case 0x2002: /* EN SPACE */
2037     case 0x2003: /* EM SPACE */
2038     case 0x2004: /* THREE-PER-EM SPACE */
2039     case 0x2005: /* FOUR-PER-EM SPACE */
2040     case 0x2006: /* SIX-PER-EM SPACE */
2041     case 0x2007: /* FIGURE SPACE */
2042     case 0x2008: /* PUNCTUATION SPACE */
2043     case 0x2009: /* THIN SPACE */
2044     case 0x200A: /* HAIR SPACE */
2045     case 0x202f: /* NARROW NO-BREAK SPACE */
2046     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2047     case 0x3000: /* IDEOGRAPHIC SPACE */
2048     break;
2049     }
2050     ecode++;
2051     break;
2052    
2053     case OP_NOT_VSPACE:
2054 ph10 443 if (eptr >= md->end_subject)
2055 ph10 428 {
2056 ph10 443 SCHECK_PARTIAL();
2057 ph10 510 MRRETURN(MATCH_NOMATCH);
2058 ph10 443 }
2059 ph10 178 GETCHARINCTEST(c, eptr);
2060     switch(c)
2061     {
2062     default: break;
2063     case 0x0a: /* LF */
2064     case 0x0b: /* VT */
2065     case 0x0c: /* FF */
2066     case 0x0d: /* CR */
2067     case 0x85: /* NEL */
2068     case 0x2028: /* LINE SEPARATOR */
2069     case 0x2029: /* PARAGRAPH SEPARATOR */
2070 ph10 510 MRRETURN(MATCH_NOMATCH);
2071 ph10 178 }
2072     ecode++;
2073     break;
2074    
2075     case OP_VSPACE:
2076 ph10 443 if (eptr >= md->end_subject)
2077 ph10 428 {
2078 ph10 443 SCHECK_PARTIAL();
2079 ph10 510 MRRETURN(MATCH_NOMATCH);
2080 ph10 443 }
2081 ph10 178 GETCHARINCTEST(c, eptr);
2082     switch(c)
2083     {
2084 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2085 ph10 178 case 0x0a: /* LF */
2086     case 0x0b: /* VT */
2087     case 0x0c: /* FF */
2088     case 0x0d: /* CR */
2089     case 0x85: /* NEL */
2090     case 0x2028: /* LINE SEPARATOR */
2091     case 0x2029: /* PARAGRAPH SEPARATOR */
2092     break;
2093     }
2094     ecode++;
2095     break;
2096    
2097 nigel 77 #ifdef SUPPORT_UCP
2098     /* Check the next character by Unicode property. We will get here only
2099     if the support is in the binary; otherwise a compile-time error occurs. */
2100    
2101     case OP_PROP:
2102     case OP_NOTPROP:
2103 ph10 443 if (eptr >= md->end_subject)
2104 ph10 428 {
2105 ph10 443 SCHECK_PARTIAL();
2106 ph10 510 MRRETURN(MATCH_NOMATCH);
2107 ph10 443 }
2108 nigel 77 GETCHARINCTEST(c, eptr);
2109     {
2110 ph10 384 const ucd_record *prop = GET_UCD(c);
2111 nigel 77
2112 nigel 87 switch(ecode[1])
2113     {
2114     case PT_ANY:
2115 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2116 nigel 87 break;
2117 nigel 77
2118 nigel 87 case PT_LAMP:
2119 ph10 349 if ((prop->chartype == ucp_Lu ||
2120     prop->chartype == ucp_Ll ||
2121     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2122 ph10 510 MRRETURN(MATCH_NOMATCH);
2123 ph10 517 break;
2124 nigel 87
2125     case PT_GC:
2126 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2127 ph10 510 MRRETURN(MATCH_NOMATCH);
2128 nigel 87 break;
2129    
2130     case PT_PC:
2131 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2132 ph10 510 MRRETURN(MATCH_NOMATCH);
2133 nigel 87 break;
2134    
2135     case PT_SC:
2136 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2137 ph10 510 MRRETURN(MATCH_NOMATCH);
2138 nigel 87 break;
2139 ph10 527
2140 ph10 517 /* These are specials */
2141 ph10 527
2142 ph10 517 case PT_ALNUM:
2143     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2144     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2145     MRRETURN(MATCH_NOMATCH);
2146 ph10 527 break;
2147    
2148 ph10 517 case PT_SPACE: /* Perl space */
2149     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2150     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2151     == (op == OP_NOTPROP))
2152     MRRETURN(MATCH_NOMATCH);
2153 ph10 527 break;
2154    
2155 ph10 517 case PT_PXSPACE: /* POSIX space */
2156     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2157 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2158 ph10 517 c == CHAR_FF || c == CHAR_CR)
2159     == (op == OP_NOTPROP))
2160     MRRETURN(MATCH_NOMATCH);
2161 ph10 527 break;
2162 nigel 87
2163 ph10 527 case PT_WORD:
2164 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2165 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2166 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2167     MRRETURN(MATCH_NOMATCH);
2168 ph10 527 break;
2169    
2170 ph10 517 /* This should never occur */
2171    
2172 nigel 87 default:
2173     RRETURN(PCRE_ERROR_INTERNAL);
2174 nigel 77 }
2175 nigel 87
2176     ecode += 3;
2177 nigel 77 }
2178     break;
2179    
2180     /* Match an extended Unicode sequence. We will get here only if the support
2181     is in the binary; otherwise a compile-time error occurs. */
2182    
2183     case OP_EXTUNI:
2184 ph10 443 if (eptr >= md->end_subject)
2185 ph10 428 {
2186 ph10 443 SCHECK_PARTIAL();
2187 ph10 510 MRRETURN(MATCH_NOMATCH);
2188 ph10 443 }
2189 nigel 77 GETCHARINCTEST(c, eptr);
2190     {
2191 ph10 349 int category = UCD_CATEGORY(c);
2192 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2193 nigel 77 while (eptr < md->end_subject)
2194     {
2195     int len = 1;
2196     if (!utf8) c = *eptr; else
2197     {
2198     GETCHARLEN(c, eptr, len);
2199     }
2200 ph10 349 category = UCD_CATEGORY(c);
2201 nigel 77 if (category != ucp_M) break;
2202     eptr += len;
2203     }
2204     }
2205     ecode++;
2206     break;
2207     #endif
2208    
2209    
2210     /* Match a back reference, possibly repeatedly. Look past the end of the
2211     item to see if there is repeat information following. The code is similar
2212     to that for character classes, but repeated for efficiency. Then obey
2213     similar code to character type repeats - written out again for speed.
2214     However, if the referenced string is the empty string, always treat
2215     it as matched, any number of times (otherwise there could be infinite
2216     loops). */
2217    
2218     case OP_REF:
2219     {
2220     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2221 ph10 345 ecode += 3;
2222    
2223 ph10 336 /* If the reference is unset, there are two possibilities:
2224 ph10 345
2225 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2226     than the amount of subject left; this ensures that every attempt at a
2227     match fails. We can't just fail here, because of the possibility of
2228     quantifiers with zero minima.
2229 ph10 345
2230     (b) If the JavaScript compatibility flag is set, set the length to zero
2231     so that the back reference matches an empty string.
2232    
2233     Otherwise, set the length to the length of what was matched by the
2234 ph10 336 referenced subpattern. */
2235 ph10 345
2236 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2237 ph10 530 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2238 ph10 336 else
2239     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2240 nigel 77
2241     /* Set up for repetition, or handle the non-repeated case */
2242    
2243     switch (*ecode)
2244     {
2245     case OP_CRSTAR:
2246     case OP_CRMINSTAR:
2247     case OP_CRPLUS:
2248     case OP_CRMINPLUS:
2249     case OP_CRQUERY:
2250     case OP_CRMINQUERY:
2251     c = *ecode++ - OP_CRSTAR;
2252     minimize = (c & 1) != 0;
2253     min = rep_min[c]; /* Pick up values from tables; */
2254     max = rep_max[c]; /* zero for max => infinity */
2255     if (max == 0) max = INT_MAX;
2256     break;
2257    
2258     case OP_CRRANGE:
2259     case OP_CRMINRANGE:
2260     minimize = (*ecode == OP_CRMINRANGE);
2261     min = GET2(ecode, 1);
2262     max = GET2(ecode, 3);
2263     if (max == 0) max = INT_MAX;
2264     ecode += 5;
2265     break;
2266    
2267     default: /* No repeat follows */
2268 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2269 ph10 428 {
2270 ph10 443 CHECK_PARTIAL();
2271 ph10 510 MRRETURN(MATCH_NOMATCH);
2272 ph10 443 }
2273 nigel 77 eptr += length;
2274     continue; /* With the main loop */
2275     }
2276    
2277     /* If the length of the reference is zero, just continue with the
2278     main loop. */
2279 ph10 443
2280 nigel 77 if (length == 0) continue;
2281    
2282     /* First, ensure the minimum number of matches are present. We get back
2283     the length of the reference string explicitly rather than passing the
2284     address of eptr, so that eptr can be a register variable. */
2285    
2286     for (i = 1; i <= min; i++)
2287     {
2288 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2289 ph10 426 {
2290 ph10 427 CHECK_PARTIAL();
2291 ph10 510 MRRETURN(MATCH_NOMATCH);
2292 ph10 427 }
2293 nigel 77 eptr += length;
2294     }
2295    
2296     /* If min = max, continue at the same level without recursion.
2297     They are not both allowed to be zero. */
2298    
2299     if (min == max) continue;
2300    
2301     /* If minimizing, keep trying and advancing the pointer */
2302    
2303     if (minimize)
2304     {
2305     for (fi = min;; fi++)
2306     {
2307 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2308 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2309 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2310 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2311 ph10 426 {
2312 ph10 427 CHECK_PARTIAL();
2313 ph10 510 MRRETURN(MATCH_NOMATCH);
2314 ph10 427 }
2315 nigel 77 eptr += length;
2316     }
2317     /* Control never gets here */
2318     }
2319    
2320     /* If maximizing, find the longest string and work backwards */
2321    
2322     else
2323     {
2324     pp = eptr;
2325     for (i = min; i < max; i++)
2326     {
2327 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2328 ph10 462 {
2329 ph10 463 CHECK_PARTIAL();
2330 ph10 462 break;
2331 ph10 463 }
2332 nigel 77 eptr += length;
2333     }
2334     while (eptr >= pp)
2335     {
2336 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2337 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2338     eptr -= length;
2339     }
2340 ph10 510 MRRETURN(MATCH_NOMATCH);
2341 nigel 77 }
2342     }
2343     /* Control never gets here */
2344    
2345     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2346     used when all the characters in the class have values in the range 0-255,
2347     and either the matching is caseful, or the characters are in the range
2348     0-127 when UTF-8 processing is enabled. The only difference between
2349     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2350     encountered.
2351    
2352     First, look past the end of the item to see if there is repeat information
2353     following. Then obey similar code to character type repeats - written out
2354     again for speed. */
2355    
2356     case OP_NCLASS:
2357     case OP_CLASS:
2358     {
2359     data = ecode + 1; /* Save for matching */
2360     ecode += 33; /* Advance past the item */
2361    
2362     switch (*ecode)
2363     {
2364     case OP_CRSTAR:
2365     case OP_CRMINSTAR:
2366     case OP_CRPLUS:
2367     case OP_CRMINPLUS:
2368     case OP_CRQUERY:
2369     case OP_CRMINQUERY:
2370     c = *ecode++ - OP_CRSTAR;
2371     minimize = (c & 1) != 0;
2372     min = rep_min[c]; /* Pick up values from tables; */
2373     max = rep_max[c]; /* zero for max => infinity */
2374     if (max == 0) max = INT_MAX;
2375     break;
2376    
2377     case OP_CRRANGE:
2378     case OP_CRMINRANGE:
2379     minimize = (*ecode == OP_CRMINRANGE);
2380     min = GET2(ecode, 1);
2381     max = GET2(ecode, 3);
2382     if (max == 0) max = INT_MAX;
2383     ecode += 5;
2384     break;
2385    
2386     default: /* No repeat follows */
2387     min = max = 1;
2388     break;
2389     }
2390    
2391     /* First, ensure the minimum number of matches are present. */
2392    
2393     #ifdef SUPPORT_UTF8
2394     /* UTF-8 mode */
2395     if (utf8)
2396     {
2397     for (i = 1; i <= min; i++)
2398     {
2399 ph10 427 if (eptr >= md->end_subject)
2400 ph10 426 {
2401 ph10 428 SCHECK_PARTIAL();
2402 ph10 510 MRRETURN(MATCH_NOMATCH);
2403 ph10 427 }
2404 nigel 77 GETCHARINC(c, eptr);
2405     if (c > 255)
2406     {
2407 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2408 nigel 77 }
2409     else
2410     {
2411 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2412 nigel 77 }
2413     }
2414     }
2415     else
2416     #endif
2417     /* Not UTF-8 mode */
2418     {
2419     for (i = 1; i <= min; i++)
2420     {
2421 ph10 427 if (eptr >= md->end_subject)
2422 ph10 426 {
2423 ph10 428 SCHECK_PARTIAL();
2424 ph10 510 MRRETURN(MATCH_NOMATCH);
2425 ph10 427 }
2426 nigel 77 c = *eptr++;
2427 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2428 nigel 77 }
2429     }
2430    
2431     /* If max == min we can continue with the main loop without the
2432     need to recurse. */
2433    
2434     if (min == max) continue;
2435    
2436     /* If minimizing, keep testing the rest of the expression and advancing
2437     the pointer while it matches the class. */
2438    
2439     if (minimize)
2440     {
2441     #ifdef SUPPORT_UTF8
2442     /* UTF-8 mode */
2443     if (utf8)
2444     {
2445     for (fi = min;; fi++)
2446     {
2447 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2448 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2449 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2450 ph10 427 if (eptr >= md->end_subject)
2451 ph10 426 {
2452 ph10 427 SCHECK_PARTIAL();
2453 ph10 510 MRRETURN(MATCH_NOMATCH);
2454 ph10 427 }
2455 nigel 77 GETCHARINC(c, eptr);
2456     if (c > 255)
2457     {
2458 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2459 nigel 77 }
2460     else
2461     {
2462 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2463 nigel 77 }
2464     }
2465     }
2466     else
2467     #endif
2468     /* Not UTF-8 mode */
2469     {
2470     for (fi = min;; fi++)
2471     {
2472 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2473 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2474 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2475 ph10 427 if (eptr >= md->end_subject)
2476 ph10 426 {
2477 ph10 427 SCHECK_PARTIAL();
2478 ph10 510 MRRETURN(MATCH_NOMATCH);
2479 ph10 427 }
2480 nigel 77 c = *eptr++;
2481 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2482 nigel 77 }
2483     }
2484     /* Control never gets here */
2485     }
2486    
2487     /* If maximizing, find the longest possible run, then work backwards. */
2488    
2489     else
2490     {
2491     pp = eptr;
2492    
2493     #ifdef SUPPORT_UTF8
2494     /* UTF-8 mode */
2495     if (utf8)
2496     {
2497     for (i = min; i < max; i++)
2498     {
2499     int len = 1;
2500 ph10 463 if (eptr >= md->end_subject)
2501 ph10 462 {
2502 ph10 463 SCHECK_PARTIAL();
2503 ph10 462 break;
2504 ph10 463 }
2505 nigel 77 GETCHARLEN(c, eptr, len);
2506     if (c > 255)
2507     {
2508     if (op == OP_CLASS) break;
2509     }
2510     else
2511     {
2512     if ((data[c/8] & (1 << (c&7))) == 0) break;
2513     }
2514     eptr += len;
2515     }
2516     for (;;)
2517     {
2518 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2519 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2520     if (eptr-- == pp) break; /* Stop if tried at original pos */
2521     BACKCHAR(eptr);
2522     }
2523     }
2524     else
2525     #endif
2526     /* Not UTF-8 mode */
2527     {
2528     for (i = min; i < max; i++)
2529     {
2530 ph10 463 if (eptr >= md->end_subject)
2531 ph10 462 {
2532 ph10 463 SCHECK_PARTIAL();
2533 ph10 462 break;
2534 ph10 463 }
2535 nigel 77 c = *eptr;
2536     if ((data[c/8] & (1 << (c&7))) == 0) break;
2537     eptr++;
2538     }
2539     while (eptr >= pp)
2540     {
2541 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2542 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2543 nigel 77 eptr--;
2544     }
2545     }
2546    
2547 ph10 510 MRRETURN(MATCH_NOMATCH);
2548 nigel 77 }
2549     }
2550     /* Control never gets here */
2551    
2552    
2553     /* Match an extended character class. This opcode is encountered only
2554 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2555     mode, because Unicode properties are supported in non-UTF-8 mode. */
2556 nigel 77
2557     #ifdef SUPPORT_UTF8
2558     case OP_XCLASS:
2559     {
2560     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2561     ecode += GET(ecode, 1); /* Advance past the item */
2562    
2563     switch (*ecode)
2564     {
2565     case OP_CRSTAR:
2566     case OP_CRMINSTAR:
2567     case OP_CRPLUS:
2568     case OP_CRMINPLUS:
2569     case OP_CRQUERY:
2570     case OP_CRMINQUERY:
2571     c = *ecode++ - OP_CRSTAR;
2572     minimize = (c & 1) != 0;
2573     min = rep_min[c]; /* Pick up values from tables; */
2574     max = rep_max[c]; /* zero for max => infinity */
2575     if (max == 0) max = INT_MAX;
2576     break;
2577    
2578     case OP_CRRANGE:
2579     case OP_CRMINRANGE:
2580     minimize = (*ecode == OP_CRMINRANGE);
2581     min = GET2(ecode, 1);
2582     max = GET2(ecode, 3);
2583     if (max == 0) max = INT_MAX;
2584     ecode += 5;
2585     break;
2586    
2587     default: /* No repeat follows */
2588     min = max = 1;
2589     break;
2590     }
2591    
2592     /* First, ensure the minimum number of matches are present. */
2593    
2594     for (i = 1; i <= min; i++)
2595     {
2596 ph10 427 if (eptr >= md->end_subject)
2597 ph10 426 {
2598     SCHECK_PARTIAL();
2599 ph10 510 MRRETURN(MATCH_NOMATCH);
2600 ph10 427 }
2601 ph10 384 GETCHARINCTEST(c, eptr);
2602 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2603 nigel 77 }
2604    
2605     /* If max == min we can continue with the main loop without the
2606     need to recurse. */
2607    
2608     if (min == max) continue;
2609    
2610     /* If minimizing, keep testing the rest of the expression and advancing
2611     the pointer while it matches the class. */
2612    
2613     if (minimize)
2614     {
2615     for (fi = min;; fi++)
2616     {
2617 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2618 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2619 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2620 ph10 427 if (eptr >= md->end_subject)
2621 ph10 426 {
2622 ph10 427 SCHECK_PARTIAL();
2623 ph10 510 MRRETURN(MATCH_NOMATCH);
2624 ph10 427 }
2625 ph10 384 GETCHARINCTEST(c, eptr);
2626 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2627 nigel 77 }
2628     /* Control never gets here */
2629     }
2630    
2631     /* If maximizing, find the longest possible run, then work backwards. */
2632    
2633     else
2634     {
2635     pp = eptr;
2636     for (i = min; i < max; i++)
2637     {
2638     int len = 1;
2639 ph10 463 if (eptr >= md->end_subject)
2640 ph10 462 {
2641 ph10 463 SCHECK_PARTIAL();
2642 ph10 462 break;
2643 ph10 463 }
2644 ph10 384 GETCHARLENTEST(c, eptr, len);
2645 nigel 77 if (!_pcre_xclass(c, data)) break;
2646     eptr += len;
2647     }
2648     for(;;)
2649     {
2650 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2651 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652     if (eptr-- == pp) break; /* Stop if tried at original pos */
2653 ph10 214 if (utf8) BACKCHAR(eptr);
2654 nigel 77 }
2655 ph10 510 MRRETURN(MATCH_NOMATCH);
2656 nigel 77 }
2657    
2658     /* Control never gets here */
2659     }
2660     #endif /* End of XCLASS */
2661    
2662     /* Match a single character, casefully */
2663    
2664     case OP_CHAR:
2665     #ifdef SUPPORT_UTF8
2666     if (utf8)
2667     {
2668     length = 1;
2669     ecode++;
2670     GETCHARLEN(fc, ecode, length);
2671 ph10 443 if (length > md->end_subject - eptr)
2672 ph10 428 {
2673     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2674 ph10 510 MRRETURN(MATCH_NOMATCH);
2675 ph10 443 }
2676 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2677 nigel 77 }
2678     else
2679     #endif
2680    
2681     /* Non-UTF-8 mode */
2682     {
2683 ph10 443 if (md->end_subject - eptr < 1)
2684 ph10 428 {
2685     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2686 ph10 510 MRRETURN(MATCH_NOMATCH);
2687 ph10 443 }
2688 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2689 nigel 77 ecode += 2;
2690     }
2691     break;
2692    
2693     /* Match a single character, caselessly */
2694    
2695     case OP_CHARNC:
2696     #ifdef SUPPORT_UTF8
2697     if (utf8)
2698     {
2699     length = 1;
2700     ecode++;
2701     GETCHARLEN(fc, ecode, length);
2702    
2703 ph10 443 if (length > md->end_subject - eptr)
2704 ph10 428 {
2705     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2706 ph10 510 MRRETURN(MATCH_NOMATCH);
2707 ph10 443 }
2708 nigel 77
2709     /* If the pattern character's value is < 128, we have only one byte, and
2710     can use the fast lookup table. */
2711    
2712     if (fc < 128)
2713     {
2714 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2715 nigel 77 }
2716    
2717     /* Otherwise we must pick up the subject character */
2718    
2719     else
2720     {
2721 nigel 93 unsigned int dc;
2722 nigel 77 GETCHARINC(dc, eptr);
2723     ecode += length;
2724    
2725     /* If we have Unicode property support, we can use it to test the other
2726 nigel 87 case of the character, if there is one. */
2727 nigel 77
2728     if (fc != dc)
2729     {
2730     #ifdef SUPPORT_UCP
2731 ph10 349 if (dc != UCD_OTHERCASE(fc))
2732 nigel 77 #endif
2733 ph10 510 MRRETURN(MATCH_NOMATCH);
2734 nigel 77 }
2735     }
2736     }
2737     else
2738     #endif /* SUPPORT_UTF8 */
2739    
2740     /* Non-UTF-8 mode */
2741     {
2742 ph10 443 if (md->end_subject - eptr < 1)
2743 ph10 428 {
2744 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2745 ph10 510 MRRETURN(MATCH_NOMATCH);
2746 ph10 443 }
2747 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2748 nigel 77 ecode += 2;
2749     }
2750     break;
2751    
2752 nigel 93 /* Match a single character repeatedly. */
2753 nigel 77
2754     case OP_EXACT:
2755     min = max = GET2(ecode, 1);
2756     ecode += 3;
2757     goto REPEATCHAR;
2758    
2759 nigel 93 case OP_POSUPTO:
2760     possessive = TRUE;
2761     /* Fall through */
2762    
2763 nigel 77 case OP_UPTO:
2764     case OP_MINUPTO:
2765     min = 0;
2766     max = GET2(ecode, 1);
2767     minimize = *ecode == OP_MINUPTO;
2768     ecode += 3;
2769     goto REPEATCHAR;
2770    
2771 nigel 93 case OP_POSSTAR:
2772     possessive = TRUE;
2773     min = 0;
2774     max = INT_MAX;
2775     ecode++;
2776     goto REPEATCHAR;
2777    
2778     case OP_POSPLUS:
2779     possessive = TRUE;
2780     min = 1;
2781     max = INT_MAX;
2782     ecode++;
2783     goto REPEATCHAR;
2784    
2785     case OP_POSQUERY:
2786     possessive = TRUE;
2787     min = 0;
2788     max = 1;
2789     ecode++;
2790     goto REPEATCHAR;
2791    
2792 nigel 77 case OP_STAR:
2793     case OP_MINSTAR:
2794     case OP_PLUS:
2795     case OP_MINPLUS:
2796     case OP_QUERY:
2797     case OP_MINQUERY:
2798     c = *ecode++ - OP_STAR;
2799     minimize = (c & 1) != 0;
2800 ph10 443
2801 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2802     max = rep_max[c]; /* zero for max => infinity */
2803     if (max == 0) max = INT_MAX;
2804    
2805 ph10 426 /* Common code for all repeated single-character matches. */
2806 nigel 77
2807     REPEATCHAR:
2808     #ifdef SUPPORT_UTF8
2809     if (utf8)
2810     {
2811     length = 1;
2812     charptr = ecode;
2813     GETCHARLEN(fc, ecode, length);
2814     ecode += length;
2815    
2816     /* Handle multibyte character matching specially here. There is
2817     support for caseless matching if UCP support is present. */
2818    
2819     if (length > 1)
2820     {
2821     #ifdef SUPPORT_UCP
2822 nigel 93 unsigned int othercase;
2823 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2824 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2825 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2826 ph10 115 else oclength = 0;
2827 nigel 77 #endif /* SUPPORT_UCP */
2828    
2829     for (i = 1; i <= min; i++)
2830     {
2831 ph10 426 if (eptr <= md->end_subject - length &&
2832     memcmp(eptr, charptr, length) == 0) eptr += length;
2833 ph10 123 #ifdef SUPPORT_UCP
2834 ph10 426 else if (oclength > 0 &&
2835     eptr <= md->end_subject - oclength &&
2836     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2837     #endif /* SUPPORT_UCP */
2838 nigel 77 else
2839     {
2840 ph10 426 CHECK_PARTIAL();
2841 ph10 510 MRRETURN(MATCH_NOMATCH);
2842 nigel 77 }
2843     }
2844    
2845     if (min == max) continue;
2846    
2847     if (minimize)
2848     {
2849     for (fi = min;; fi++)
2850     {
2851 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2852 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2853 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2854 ph10 426 if (eptr <= md->end_subject - length &&
2855     memcmp(eptr, charptr, length) == 0) eptr += length;
2856 ph10 123 #ifdef SUPPORT_UCP
2857 ph10 426 else if (oclength > 0 &&
2858     eptr <= md->end_subject - oclength &&
2859     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2860     #endif /* SUPPORT_UCP */
2861 nigel 77 else
2862     {
2863 ph10 426 CHECK_PARTIAL();
2864 ph10 510 MRRETURN(MATCH_NOMATCH);
2865 nigel 77 }
2866     }
2867     /* Control never gets here */
2868     }
2869 nigel 93
2870     else /* Maximize */
2871 nigel 77 {
2872     pp = eptr;
2873     for (i = min; i < max; i++)
2874     {
2875 ph10 426 if (eptr <= md->end_subject - length &&
2876     memcmp(eptr, charptr, length) == 0) eptr += length;
2877 ph10 123 #ifdef SUPPORT_UCP
2878 ph10 426 else if (oclength > 0 &&
2879     eptr <= md->end_subject - oclength &&
2880     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2881     #endif /* SUPPORT_UCP */
2882 ph10 463 else
2883 ph10 462 {
2884 ph10 463 CHECK_PARTIAL();
2885 ph10 462 break;
2886 ph10 463 }
2887 nigel 77 }
2888 nigel 93
2889     if (possessive) continue;
2890 ph10 427
2891 ph10 120 for(;;)
2892 ph10 426 {
2893     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2894     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2895 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2896 ph10 115 #ifdef SUPPORT_UCP
2897 ph10 426 eptr--;
2898     BACKCHAR(eptr);
2899 ph10 123 #else /* without SUPPORT_UCP */
2900 ph10 426 eptr -= length;
2901 ph10 123 #endif /* SUPPORT_UCP */
2902 ph10 426 }
2903 nigel 77 }
2904     /* Control never gets here */
2905     }
2906    
2907     /* If the length of a UTF-8 character is 1, we fall through here, and
2908     obey the code as for non-UTF-8 characters below, though in this case the
2909     value of fc will always be < 128. */
2910     }
2911     else
2912     #endif /* SUPPORT_UTF8 */
2913    
2914     /* When not in UTF-8 mode, load a single-byte character. */
2915    
2916 ph10 426 fc = *ecode++;
2917 ph10 443
2918 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2919     may not be in UTF-8 mode. The code is duplicated for the caseless and
2920     caseful cases, for speed, since matching characters is likely to be quite
2921     common. First, ensure the minimum number of matches are present. If min =
2922     max, continue at the same level without recursing. Otherwise, if
2923     minimizing, keep trying the rest of the expression and advancing one
2924     matching character if failing, up to the maximum. Alternatively, if
2925     maximizing, find the maximum number of characters and work backwards. */
2926    
2927     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2928     max, eptr));
2929    
2930     if ((ims & PCRE_CASELESS) != 0)
2931     {
2932     fc = md->lcc[fc];
2933     for (i = 1; i <= min; i++)
2934 ph10 426 {
2935     if (eptr >= md->end_subject)
2936     {
2937     SCHECK_PARTIAL();
2938 ph10 510 MRRETURN(MATCH_NOMATCH);
2939 ph10 426 }
2940 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2941 ph10 426 }
2942 nigel 77 if (min == max) continue;
2943     if (minimize)
2944     {
2945     for (fi = min;; fi++)
2946     {
2947 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2948 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2949 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2950 ph10 426 if (eptr >= md->end_subject)
2951     {
2952 ph10 427 SCHECK_PARTIAL();
2953 ph10 510 MRRETURN(MATCH_NOMATCH);
2954 ph10 426 }
2955 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2956 nigel 77 }
2957     /* Control never gets here */
2958     }
2959 nigel 93 else /* Maximize */
2960 nigel 77 {
2961     pp = eptr;
2962     for (i = min; i < max; i++)
2963     {
2964 ph10 463 if (eptr >= md->end_subject)
2965 ph10 462 {
2966     SCHECK_PARTIAL();
2967     break;
2968 ph10 463 }
2969 ph10 462 if (fc != md->lcc[*eptr]) break;
2970 nigel 77 eptr++;
2971     }
2972 ph10 427
2973 nigel 93 if (possessive) continue;
2974 ph10 427
2975 nigel 77 while (eptr >= pp)
2976     {
2977 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2978 nigel 77 eptr--;
2979     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980     }
2981 ph10 510 MRRETURN(MATCH_NOMATCH);
2982 nigel 77 }
2983     /* Control never gets here */
2984     }
2985    
2986     /* Caseful comparisons (includes all multi-byte characters) */
2987    
2988     else
2989     {
2990 ph10 427 for (i = 1; i <= min; i++)
2991 ph10 426 {
2992     if (eptr >= md->end_subject)
2993     {
2994     SCHECK_PARTIAL();
2995 ph10 510 MRRETURN(MATCH_NOMATCH);
2996 ph10 426 }
2997 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2998 ph10 427 }
2999 ph10 443
3000 nigel 77 if (min == max) continue;
3001 ph10 443
3002 nigel 77 if (minimize)
3003     {
3004     for (fi = min;; fi++)
3005     {
3006 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3007 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3008 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3009 ph10 426 if (eptr >= md->end_subject)
3010 ph10 427 {
3011 ph10 426 SCHECK_PARTIAL();
3012 ph10 510 MRRETURN(MATCH_NOMATCH);
3013 ph10 427 }
3014 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3015 nigel 77 }
3016     /* Control never gets here */
3017     }
3018 nigel 93 else /* Maximize */
3019 nigel 77 {
3020     pp = eptr;
3021     for (i = min; i < max; i++)
3022     {
3023 ph10 463 if (eptr >= md->end_subject)
3024 ph10 462 {
3025 ph10 463 SCHECK_PARTIAL();
3026 ph10 462 break;
3027 ph10 463 }
3028 ph10 462 if (fc != *eptr) break;
3029 nigel 77 eptr++;
3030     }
3031 nigel 93 if (possessive) continue;
3032 ph10 443
3033 nigel 77 while (eptr >= pp)
3034     {
3035 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3036 nigel 77 eptr--;
3037     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3038     }
3039 ph10 510 MRRETURN(MATCH_NOMATCH);
3040 nigel 77 }
3041     }
3042     /* Control never gets here */
3043    
3044     /* Match a negated single one-byte character. The character we are
3045     checking can be multibyte. */
3046    
3047     case OP_NOT:
3048 ph10 443 if (eptr >= md->end_subject)
3049 ph10 428 {
3050 ph10 443 SCHECK_PARTIAL();
3051 ph10 510 MRRETURN(MATCH_NOMATCH);
3052 ph10 443 }
3053 nigel 77 ecode++;
3054     GETCHARINCTEST(c, eptr);
3055     if ((ims & PCRE_CASELESS) != 0)
3056     {
3057     #ifdef SUPPORT_UTF8
3058     if (c < 256)
3059     #endif
3060     c = md->lcc[c];
3061 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3062 nigel 77 }
3063     else
3064     {
3065 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3066 nigel 77 }
3067     break;
3068    
3069     /* Match a negated single one-byte character repeatedly. This is almost a
3070     repeat of the code for a repeated single character, but I haven't found a
3071     nice way of commoning these up that doesn't require a test of the
3072     positive/negative option for each character match. Maybe that wouldn't add
3073     very much to the time taken, but character matching *is* what this is all
3074     about... */
3075    
3076     case OP_NOTEXACT:
3077     min = max = GET2(ecode, 1);
3078     ecode += 3;
3079     goto REPEATNOTCHAR;
3080    
3081     case OP_NOTUPTO:
3082     case OP_NOTMINUPTO:
3083     min = 0;
3084     max = GET2(ecode, 1);
3085     minimize = *ecode == OP_NOTMINUPTO;
3086     ecode += 3;
3087     goto REPEATNOTCHAR;
3088    
3089 nigel 93 case OP_NOTPOSSTAR:
3090     possessive = TRUE;
3091     min = 0;
3092     max = INT_MAX;
3093     ecode++;
3094     goto REPEATNOTCHAR;
3095    
3096     case OP_NOTPOSPLUS:
3097     possessive = TRUE;
3098     min = 1;
3099     max = INT_MAX;
3100     ecode++;
3101     goto REPEATNOTCHAR;
3102    
3103     case OP_NOTPOSQUERY:
3104     possessive = TRUE;
3105     min = 0;
3106     max = 1;
3107     ecode++;
3108     goto REPEATNOTCHAR;
3109    
3110     case OP_NOTPOSUPTO:
3111     possessive = TRUE;
3112     min = 0;
3113     max = GET2(ecode, 1);
3114     ecode += 3;
3115     goto REPEATNOTCHAR;
3116    
3117 nigel 77 case OP_NOTSTAR:
3118     case OP_NOTMINSTAR:
3119     case OP_NOTPLUS:
3120     case OP_NOTMINPLUS:
3121     case OP_NOTQUERY:
3122     case OP_NOTMINQUERY:
3123     c = *ecode++ - OP_NOTSTAR;
3124     minimize = (c & 1) != 0;
3125     min = rep_min[c]; /* Pick up values from tables; */
3126     max = rep_max[c]; /* zero for max => infinity */
3127     if (max == 0) max = INT_MAX;
3128    
3129 ph10 426 /* Common code for all repeated single-byte matches. */
3130 nigel 77
3131     REPEATNOTCHAR:
3132     fc = *ecode++;
3133    
3134     /* The code is duplicated for the caseless and caseful cases, for speed,
3135     since matching characters is likely to be quite common. First, ensure the
3136     minimum number of matches are present. If min = max, continue at the same
3137     level without recursing. Otherwise, if minimizing, keep trying the rest of
3138     the expression and advancing one matching character if failing, up to the
3139     maximum. Alternatively, if maximizing, find the maximum number of
3140     characters and work backwards. */
3141    
3142     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3143     max, eptr));
3144    
3145     if ((ims & PCRE_CASELESS) != 0)
3146     {
3147     fc = md->lcc[fc];
3148    
3149     #ifdef SUPPORT_UTF8
3150     /* UTF-8 mode */
3151     if (utf8)
3152     {
3153 nigel 93 register unsigned int d;
3154 nigel 77 for (i = 1; i <= min; i++)
3155     {
3156 ph10 426 if (eptr >= md->end_subject)
3157     {
3158     SCHECK_PARTIAL();
3159 ph10 510 MRRETURN(MATCH_NOMATCH);
3160 ph10 427 }
3161 nigel 77 GETCHARINC(d, eptr);
3162     if (d < 256) d = md->lcc[d];
3163 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3164 nigel 77 }
3165     }
3166     else
3167     #endif
3168    
3169     /* Not UTF-8 mode */
3170     {
3171     for (i = 1; i <= min; i++)
3172 ph10 426 {
3173     if (eptr >= md->end_subject)
3174     {
3175     SCHECK_PARTIAL();
3176 ph10 510 MRRETURN(MATCH_NOMATCH);
3177 ph10 427 }
3178 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3179 ph10 427 }
3180 nigel 77 }
3181    
3182     if (min == max) continue;
3183    
3184     if (minimize)
3185     {
3186     #ifdef SUPPORT_UTF8
3187     /* UTF-8 mode */
3188     if (utf8)
3189     {
3190 nigel 93 register unsigned int d;
3191 nigel 77 for (fi = min;; fi++)
3192     {
3193 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3194 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3195 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3196 ph10 427 if (eptr >= md->end_subject)
3197 ph10 426 {
3198 ph10 427 SCHECK_PARTIAL();
3199 ph10 510 MRRETURN(MATCH_NOMATCH);
3200 ph10 427 }
3201 nigel 77 GETCHARINC(d, eptr);
3202     if (d < 256) d = md->lcc[d];
3203 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3204 nigel 77 }
3205     }
3206     else
3207     #endif
3208     /* Not UTF-8 mode */
3209     {
3210     for (fi = min;; fi++)
3211     {
3212 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3213 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3214 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3215 ph10 426 if (eptr >= md->end_subject)
3216     {
3217     SCHECK_PARTIAL();
3218 ph10 510 MRRETURN(MATCH_NOMATCH);
3219 ph10 426 }
3220 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3221 nigel 77 }
3222     }
3223     /* Control never gets here */
3224     }
3225    
3226     /* Maximize case */
3227    
3228     else
3229     {
3230     pp = eptr;
3231    
3232     #ifdef SUPPORT_UTF8
3233     /* UTF-8 mode */
3234     if (utf8)
3235     {
3236 nigel 93 register unsigned int d;
3237 nigel 77 for (i = min; i < max; i++)
3238     {
3239     int len = 1;
3240 ph10 463 if (eptr >= md->end_subject)
3241 ph10 462 {
3242 ph10 463 SCHECK_PARTIAL();
3243 ph10 462 break;
3244 ph10 463 }
3245 nigel 77 GETCHARLEN(d, eptr, len);
3246     if (d < 256) d = md->lcc[d];
3247     if (fc == d) break;
3248     eptr += len;
3249     }
3250 nigel 93 if (possessive) continue;
3251     for(;;)
3252 nigel 77 {
3253 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3254 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255     if (eptr-- == pp) break; /* Stop if tried at original pos */
3256     BACKCHAR(eptr);
3257     }
3258     }
3259     else
3260     #endif
3261     /* Not UTF-8 mode */
3262     {
3263     for (i = min; i < max; i++)
3264     {
3265 ph10 463 if (eptr >= md->end_subject)
3266 ph10 462 {
3267     SCHECK_PARTIAL();
3268     break;
3269 ph10 463 }
3270 ph10 462 if (fc == md->lcc[*eptr]) break;
3271 nigel 77 eptr++;
3272     }
3273 nigel 93 if (possessive) continue;
3274 nigel 77 while (eptr >= pp)
3275     {
3276 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3277 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3278     eptr--;
3279     }
3280     }
3281    
3282 ph10 510 MRRETURN(MATCH_NOMATCH);
3283 nigel 77 }
3284     /* Control never gets here */
3285     }
3286    
3287     /* Caseful comparisons */
3288    
3289     else
3290     {
3291     #ifdef SUPPORT_UTF8
3292     /* UTF-8 mode */
3293     if (utf8)
3294     {
3295 nigel 93 register unsigned int d;
3296 nigel 77 for (i = 1; i <= min; i++)
3297     {
3298 ph10 426 if (eptr >= md->end_subject)
3299     {
3300     SCHECK_PARTIAL();
3301 ph10 510 MRRETURN(MATCH_NOMATCH);
3302 ph10 427 }
3303 nigel 77 GETCHARINC(d, eptr);
3304 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3305 nigel 77 }
3306     }
3307     else
3308     #endif
3309     /* Not UTF-8 mode */
3310     {
3311     for (i = 1; i <= min; i++)
3312 ph10 426 {
3313     if (eptr >= md->end_subject)
3314     {
3315     SCHECK_PARTIAL();
3316 ph10 510 MRRETURN(MATCH_NOMATCH);
3317 ph10 427 }
3318 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3319 ph10 427 }
3320 nigel 77 }
3321    
3322     if (min == max) continue;
3323    
3324     if (minimize)
3325     {
3326     #ifdef SUPPORT_UTF8
3327     /* UTF-8 mode */
3328     if (utf8)
3329     {
3330 nigel 93 register unsigned int d;
3331 nigel 77 for (fi = min;; fi++)
3332     {
3333 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3334 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3335 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3336 ph10 427 if (eptr >= md->end_subject)
3337 ph10 426 {
3338 ph10 427 SCHECK_PARTIAL();
3339 ph10 510 MRRETURN(MATCH_NOMATCH);
3340 ph10 427 }
3341 nigel 77 GETCHARINC(d, eptr);
3342 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3343 nigel 77 }
3344     }
3345     else
3346     #endif
3347     /* Not UTF-8 mode */
3348     {
3349     for (fi = min;; fi++)
3350     {
3351 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3352 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3353 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3354 ph10 426 if (eptr >= md->end_subject)
3355     {
3356     SCHECK_PARTIAL();
3357 ph10 510 MRRETURN(MATCH_NOMATCH);
3358 ph10 427 }
3359 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3360 nigel 77 }
3361     }
3362     /* Control never gets here */
3363     }
3364    
3365     /* Maximize case */
3366    
3367     else
3368     {
3369     pp = eptr;
3370    
3371     #ifdef SUPPORT_UTF8
3372     /* UTF-8 mode */
3373     if (utf8)
3374     {
3375 nigel 93 register unsigned int d;
3376 nigel 77 for (i = min; i < max; i++)
3377     {
3378     int len = 1;
3379 ph10 463 if (eptr >= md->end_subject)
3380 ph10 462 {
3381 ph10 463 SCHECK_PARTIAL();
3382 ph10 462 break;
3383 ph10 463 }
3384 nigel 77 GETCHARLEN(d, eptr, len);
3385     if (fc == d) break;
3386     eptr += len;
3387     }
3388 nigel 93 if (possessive) continue;
3389 nigel 77 for(;;)
3390     {
3391 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3392 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3393     if (eptr-- == pp) break; /* Stop if tried at original pos */
3394     BACKCHAR(eptr);
3395     }
3396     }
3397     else
3398     #endif
3399     /* Not UTF-8 mode */
3400     {
3401     for (i = min; i < max; i++)
3402     {
3403 ph10 463 if (eptr >= md->end_subject)
3404 ph10 462 {
3405 ph10 463 SCHECK_PARTIAL();
3406 ph10 462 break;
3407 ph10 463 }
3408 ph10 462 if (fc == *eptr) break;
3409 nigel 77 eptr++;
3410     }
3411 nigel 93 if (possessive) continue;
3412 nigel 77 while (eptr >= pp)
3413     {
3414 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3415 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3416     eptr--;
3417     }
3418     }
3419    
3420 ph10 510 MRRETURN(MATCH_NOMATCH);
3421 nigel 77 }
3422     }
3423     /* Control never gets here */
3424    
3425     /* Match a single character type repeatedly; several different opcodes
3426     share code. This is very similar to the code for single characters, but we
3427     repeat it in the interests of efficiency. */
3428    
3429     case OP_TYPEEXACT:
3430     min = max = GET2(ecode, 1);
3431     minimize = TRUE;
3432     ecode += 3;
3433     goto REPEATTYPE;
3434    
3435     case OP_TYPEUPTO:
3436     case OP_TYPEMINUPTO:
3437     min = 0;
3438     max = GET2(ecode, 1);
3439     minimize = *ecode == OP_TYPEMINUPTO;
3440     ecode += 3;
3441     goto REPEATTYPE;
3442    
3443 nigel 93 case OP_TYPEPOSSTAR:
3444     possessive = TRUE;
3445     min = 0;
3446     max = INT_MAX;
3447     ecode++;
3448     goto REPEATTYPE;
3449    
3450     case OP_TYPEPOSPLUS:
3451     possessive = TRUE;
3452     min = 1;
3453     max = INT_MAX;
3454     ecode++;
3455     goto REPEATTYPE;
3456    
3457     case OP_TYPEPOSQUERY:
3458     possessive = TRUE;
3459     min = 0;
3460     max = 1;
3461     ecode++;
3462     goto REPEATTYPE;
3463    
3464     case OP_TYPEPOSUPTO:
3465     possessive = TRUE;
3466     min = 0;
3467     max = GET2(ecode, 1);
3468     ecode += 3;
3469     goto REPEATTYPE;
3470    
3471 nigel 77 case OP_TYPESTAR:
3472     case OP_TYPEMINSTAR:
3473     case OP_TYPEPLUS:
3474     case OP_TYPEMINPLUS:
3475     case OP_TYPEQUERY:
3476     case OP_TYPEMINQUERY:
3477     c = *ecode++ - OP_TYPESTAR;
3478     minimize = (c & 1) != 0;
3479     min = rep_min[c]; /* Pick up values from tables; */
3480     max = rep_max[c]; /* zero for max => infinity */
3481     if (max == 0) max = INT_MAX;
3482    
3483     /* Common code for all repeated single character type matches. Note that
3484     in UTF-8 mode, '.' matches a character of any length, but for the other
3485     character types, the valid characters are all one-byte long. */
3486    
3487     REPEATTYPE:
3488     ctype = *ecode++; /* Code for the character type */
3489    
3490     #ifdef SUPPORT_UCP
3491     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3492     {
3493     prop_fail_result = ctype == OP_NOTPROP;
3494     prop_type = *ecode++;
3495 nigel 87 prop_value = *ecode++;
3496 nigel 77 }
3497     else prop_type = -1;
3498     #endif
3499    
3500     /* First, ensure the minimum number of matches are present. Use inline
3501     code for maximizing the speed, and do the type test once at the start
3502 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3503 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3504     and single-bytes. */
3505    
3506     if (min > 0)
3507     {
3508     #ifdef SUPPORT_UCP
3509 nigel 87 if (prop_type >= 0)
3510 nigel 77 {
3511 nigel 87 switch(prop_type)
3512 nigel 77 {
3513 nigel 87 case PT_ANY:
3514 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3515 nigel 87 for (i = 1; i <= min; i++)
3516     {
3517 ph10 427 if (eptr >= md->end_subject)
3518 ph10 426 {
3519 ph10 427 SCHECK_PARTIAL();
3520 ph10 510 MRRETURN(MATCH_NOMATCH);
3521 ph10 427 }
3522 ph10 184 GETCHARINCTEST(c, eptr);
3523 nigel 87 }
3524     break;
3525    
3526     case PT_LAMP:
3527     for (i = 1; i <= min; i++)
3528     {
3529 ph10 427 if (eptr >= md->end_subject)
3530 ph10 426 {
3531 ph10 427 SCHECK_PARTIAL();
3532 ph10 510 MRRETURN(MATCH_NOMATCH);
3533 ph10 427 }
3534 ph10 184 GETCHARINCTEST(c, eptr);
3535 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3536 nigel 87 if ((prop_chartype == ucp_Lu ||
3537     prop_chartype == ucp_Ll ||
3538     prop_chartype == ucp_Lt) == prop_fail_result)
3539 ph10 510 MRRETURN(MATCH_NOMATCH);
3540 nigel 87 }
3541     break;
3542    
3543     case PT_GC:
3544     for (i = 1; i <= min; i++)
3545     {
3546 ph10 427 if (eptr >= md->end_subject)
3547 ph10 426 {
3548 ph10 427 SCHECK_PARTIAL();
3549 ph10 510 MRRETURN(MATCH_NOMATCH);
3550 ph10 427 }
3551 ph10 184 GETCHARINCTEST(c, eptr);
3552 ph10 349 prop_category = UCD_CATEGORY(c);
3553 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3554 ph10 510 MRRETURN(MATCH_NOMATCH);
3555 nigel 87 }
3556     break;
3557    
3558     case PT_PC:
3559     for (i = 1; i <= min; i++)
3560     {
3561 ph10 427 if (eptr >= md->end_subject)
3562 ph10 426 {
3563 ph10 427 SCHECK_PARTIAL();
3564 ph10 510 MRRETURN(MATCH_NOMATCH);
3565 ph10 427 }
3566 ph10 184 GETCHARINCTEST(c, eptr);
3567 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3568 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3569 ph10 510 MRRETURN(MATCH_NOMATCH);
3570 nigel 87 }
3571     break;
3572    
3573     case PT_SC:
3574     for (i = 1; i <= min; i++)
3575     {
3576 ph10 427 if (eptr >= md->end_subject)
3577 ph10 426 {
3578 ph10 427 SCHECK_PARTIAL();
3579 ph10 510 MRRETURN(MATCH_NOMATCH);
3580 ph10 427 }
3581 ph10 184 GETCHARINCTEST(c, eptr);
3582 ph10 349 prop_script = UCD_SCRIPT(c);
3583 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3584 ph10 510 MRRETURN(MATCH_NOMATCH);
3585 nigel 87 }
3586     break;
3587 ph10 527
3588 ph10 517 case PT_ALNUM:
3589     for (i = 1; i <= min; i++)
3590     {
3591     if (eptr >= md->end_subject)
3592     {
3593     SCHECK_PARTIAL();
3594     MRRETURN(MATCH_NOMATCH);
3595     }
3596     GETCHARINCTEST(c, eptr);
3597 ph10 527 prop_category = UCD_CATEGORY(c);
3598     if ((prop_category == ucp_L || prop_category == ucp_N)
3599 ph10 517 == prop_fail_result)
3600     MRRETURN(MATCH_NOMATCH);
3601     }
3602     break;
3603 ph10 527
3604 ph10 517 case PT_SPACE: /* Perl space */
3605     for (i = 1; i <= min; i++)
3606     {
3607     if (eptr >= md->end_subject)
3608     {
3609     SCHECK_PARTIAL();
3610     MRRETURN(MATCH_NOMATCH);
3611     }
3612     GETCHARINCTEST(c, eptr);
3613 ph10 527 prop_category = UCD_CATEGORY(c);
3614     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3615     c == CHAR_FF || c == CHAR_CR)
3616 ph10 517 == prop_fail_result)
3617     MRRETURN(MATCH_NOMATCH);
3618     }
3619     break;
3620 ph10 527
3621 ph10 517 case PT_PXSPACE: /* POSIX space */
3622     for (i = 1; i <= min; i++)
3623     {
3624     if (eptr >= md->end_subject)
3625     {
3626     SCHECK_PARTIAL();
3627     MRRETURN(MATCH_NOMATCH);
3628     }
3629     GETCHARINCTEST(c, eptr);
3630 ph10 527 prop_category = UCD_CATEGORY(c);
3631     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3632     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3633 ph10 517 == prop_fail_result)
3634     MRRETURN(MATCH_NOMATCH);
3635     }
3636     break;
3637 ph10 527
3638     case PT_WORD:
3639 ph10 517 for (i = 1; i <= min; i++)
3640     {
3641     if (eptr >= md->end_subject)
3642     {
3643     SCHECK_PARTIAL();
3644     MRRETURN(MATCH_NOMATCH);
3645     }
3646     GETCHARINCTEST(c, eptr);
3647 ph10 527 prop_category = UCD_CATEGORY(c);
3648 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3649 ph10 527 c == CHAR_UNDERSCORE)
3650 ph10 517 == prop_fail_result)
3651     MRRETURN(MATCH_NOMATCH);
3652     }
3653     break;
3654 ph10 527
3655 ph10 517 /* This should not occur */
3656 nigel 87
3657     default:
3658     RRETURN(PCRE_ERROR_INTERNAL);
3659 nigel 77 }
3660     }
3661    
3662     /* Match extended Unicode sequences. We will get here only if the
3663     support is in the binary; otherwise a compile-time error occurs. */
3664    
3665     else if (ctype == OP_EXTUNI)
3666     {
3667     for (i = 1; i <= min; i++)
3668     {
3669 ph10 427 if (eptr >= md->end_subject)
3670 ph10 426 {
3671 ph10 427 SCHECK_PARTIAL();
3672 ph10 510 MRRETURN(MATCH_NOMATCH);
3673 ph10 427 }
3674 nigel 77 GETCHARINCTEST(c, eptr);
3675 ph10 349 prop_category = UCD_CATEGORY(c);
3676 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3677 nigel 77 while (eptr < md->end_subject)
3678     {
3679     int len = 1;
3680 ph10 426 if (!utf8) c = *eptr;
3681     else { GETCHARLEN(c, eptr, len); }
3682 ph10 349 prop_category = UCD_CATEGORY(c);
3683 nigel 77 if (prop_category != ucp_M) break;
3684     eptr += len;
3685     }
3686     }
3687     }
3688    
3689     else
3690     #endif /* SUPPORT_UCP */
3691    
3692     /* Handle all other cases when the coding is UTF-8 */
3693    
3694     #ifdef SUPPORT_UTF8
3695     if (utf8) switch(ctype)
3696     {
3697     case OP_ANY:
3698     for (i = 1; i <= min; i++)
3699     {
3700 ph10 426 if (eptr >= md->end_subject)
3701     {
3702 ph10 427 SCHECK_PARTIAL();
3703 ph10 510 MRRETURN(MATCH_NOMATCH);
3704 ph10 427 }
3705 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3706 nigel 91 eptr++;
3707 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3708     }
3709     break;
3710    
3711 ph10 341 case OP_ALLANY:
3712     for (i = 1; i <= min; i++)
3713     {
3714 ph10 427 if (eptr >= md->end_subject)
3715 ph10 426 {
3716     SCHECK_PARTIAL();
3717 ph10 510 MRRETURN(MATCH_NOMATCH);
3718 ph10 427 }
3719 ph10 341 eptr++;
3720     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3721     }
3722     break;
3723    
3724 nigel 77 case OP_ANYBYTE:
3725 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3726 nigel 77 eptr += min;
3727     break;
3728    
3729 nigel 93 case OP_ANYNL:
3730     for (i = 1; i <= min; i++)
3731     {
3732 ph10 427 if (eptr >= md->end_subject)
3733 ph10 426 {
3734     SCHECK_PARTIAL();
3735 ph10 510 MRRETURN(MATCH_NOMATCH);
3736 ph10 427 }
3737 nigel 93 GETCHARINC(c, eptr);
3738     switch(c)
3739     {
3740 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3741 nigel 93 case 0x000d:
3742     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3743     break;
3744 ph10 231
3745 nigel 93 case 0x000a:
3746 ph10 231 break;
3747    
3748 nigel 93 case 0x000b:
3749     case 0x000c:
3750     case 0x0085:
3751     case 0x2028:
3752     case 0x2029:
3753 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3754 nigel 93 break;
3755     }
3756     }
3757     break;
3758    
3759 ph10 178 case OP_NOT_HSPACE:
3760     for (i = 1; i <= min; i++)
3761     {
3762 ph10 427 if (eptr >= md->end_subject)
3763 ph10 426 {
3764     SCHECK_PARTIAL();
3765 ph10 510 MRRETURN(MATCH_NOMATCH);
3766 ph10 427 }
3767 ph10 178 GETCHARINC(c, eptr);
3768     switch(c)
3769     {
3770     default: break;
3771     case 0x09: /* HT */
3772     case 0x20: /* SPACE */
3773     case 0xa0: /* NBSP */
3774     case 0x1680: /* OGHAM SPACE MARK */
3775     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3776     case 0x2000: /* EN QUAD */
3777     case 0x2001: /* EM QUAD */
3778     case 0x2002: /* EN SPACE */
3779     case 0x2003: /* EM SPACE */
3780     case 0x2004: /* THREE-PER-EM SPACE */
3781     case 0x2005: /* FOUR-PER-EM SPACE */
3782     case 0x2006: /* SIX-PER-EM SPACE */
3783     case 0x2007: /* FIGURE SPACE */
3784     case 0x2008: /* PUNCTUATION SPACE */
3785     case 0x2009: /* THIN SPACE */
3786     case 0x200A: /* HAIR SPACE */
3787     case 0x202f: /* NARROW NO-BREAK SPACE */
3788     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3789     case 0x3000: /* IDEOGRAPHIC SPACE */
3790 ph10 510 MRRETURN(MATCH_NOMATCH);
3791 ph10 178 }
3792     }
3793     break;
3794 ph10 182
3795 ph10 178 case OP_HSPACE:
3796     for (i = 1; i <= min; i++)
3797     {
3798 ph10 427 if (eptr >= md->end_subject)
3799 ph10 426 {
3800 ph10 427 SCHECK_PARTIAL();
3801 ph10 510 MRRETURN(MATCH_NOMATCH);
3802 ph10 427 }
3803 ph10 178 GETCHARINC(c, eptr);
3804     switch(c)
3805     {
3806 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3807 ph10 178 case 0x09: /* HT */
3808     case 0x20: /* SPACE */
3809     case 0xa0: /* NBSP */
3810     case 0x1680: /* OGHAM SPACE MARK */
3811     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3812     case 0x2000: /* EN QUAD */
3813     case 0x2001: /* EM QUAD */
3814     case 0x2002: /* EN SPACE */
3815     case 0x2003: /* EM SPACE */
3816     case 0x2004: /* THREE-PER-EM SPACE */
3817     case 0x2005: /* FOUR-PER-EM SPACE */
3818     case 0x2006: /* SIX-PER-EM SPACE */
3819     case 0x2007: /* FIGURE SPACE */
3820     case 0x2008: /* PUNCTUATION SPACE */
3821     case 0x2009: /* THIN SPACE */
3822     case 0x200A: /* HAIR SPACE */
3823     case 0x202f: /* NARROW NO-BREAK SPACE */
3824     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3825     case 0x3000: /* IDEOGRAPHIC SPACE */
3826     break;
3827     }
3828     }
3829     break;
3830 ph10 182
3831 ph10 178 case OP_NOT_VSPACE:
3832     for (i = 1; i <= min; i++)
3833     {
3834 ph10 427 if (eptr >= md->end_subject)
3835 ph10 426 {
3836 ph10 427 SCHECK_PARTIAL();
3837 ph10 510 MRRETURN(MATCH_NOMATCH);
3838 ph10 427 }
3839 ph10 178 GETCHARINC(c, eptr);
3840     switch(c)
3841     {
3842     default: break;
3843     case 0x0a: /* LF */
3844     case 0x0b: /* VT */
3845     case 0x0c: /* FF */
3846     case 0x0d: /* CR */
3847     case 0x85: /* NEL */
3848     case 0x2028: /* LINE SEPARATOR */
3849     case 0x2029: /* PARAGRAPH SEPARATOR */
3850 ph10 510 MRRETURN(MATCH_NOMATCH);
3851 ph10 178 }
3852     }
3853     break;
3854 ph10 182
3855 ph10 178 case OP_VSPACE:
3856     for (i = 1; i <= min; i++)
3857     {
3858 ph10 427 if (eptr >= md->end_subject)
3859 ph10 426 {
3860 ph10 427 SCHECK_PARTIAL();
3861 ph10 510 MRRETURN(MATCH_NOMATCH);
3862 ph10 427 }
3863 ph10 178 GETCHARINC(c, eptr);
3864     switch(c)
3865     {
3866 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3867 ph10 178 case 0x0a: /* LF */
3868     case 0x0b: /* VT */
3869     case 0x0c: /* FF */
3870     case 0x0d: /* CR */
3871     case 0x85: /* NEL */
3872     case 0x2028: /* LINE SEPARATOR */
3873     case 0x2029: /* PARAGRAPH SEPARATOR */
3874 ph10 182 break;
3875 ph10 178 }
3876     }
3877     break;
3878    
3879 nigel 77 case OP_NOT_DIGIT:
3880     for (i = 1; i <= min; i++)
3881     {
3882 ph10 427 if (eptr >= md->end_subject)
3883 ph10 426 {
3884 ph10 427 SCHECK_PARTIAL();
3885 ph10 510 MRRETURN(MATCH_NOMATCH);
3886 ph10 427 }
3887 nigel 77 GETCHARINC(c, eptr);
3888     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3889 ph10 510 MRRETURN(MATCH_NOMATCH);
3890 nigel 77 }
3891     break;
3892    
3893     case OP_DIGIT:
3894     for (i = 1; i <= min; i++)
3895     {
3896 ph10 427 if (eptr >= md->end_subject)
3897 ph10 426 {
3898 ph10 427 SCHECK_PARTIAL();
3899 ph10 510 MRRETURN(MATCH_NOMATCH);
3900 ph10 427 }
3901 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3902 ph10 510 MRRETURN(MATCH_NOMATCH);
3903 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3904     }
3905     break;
3906    
3907     case OP_NOT_WHITESPACE:
3908     for (i = 1; i <= min; i++)
3909     {
3910 ph10 427 if (eptr >= md->end_subject)
3911 ph10 426 {
3912 ph10 427 SCHECK_PARTIAL();
3913 ph10 510 MRRETURN(MATCH_NOMATCH);
3914 ph10 427 }
3915 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3916 ph10 510 MRRETURN(MATCH_NOMATCH);
3917 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3918 nigel 77 }