/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 569 - (hide annotations) (download)
Sun Nov 7 16:14:50 2010 UTC (3 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 186848 byte(s)
Add PCRE_ERROR_SHORTUTF8 to PCRE_PARTIAL_HARD processing.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259     RM61, RM62 };
260 ph10 164
261 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
262 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 ph10 501 actually used in this definition. */
264 nigel 77
265     #ifndef NO_RECURSE
266     #define REGISTER register
267 ph10 164
268 ph10 475 #ifdef PCRE_DEBUG
269 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 nigel 87 { \
271     printf("match() called in line %d\n", __LINE__); \
272 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 nigel 87 printf("to line %d\n", __LINE__); \
274     }
275     #define RRETURN(ra) \
276     { \
277     printf("match() returned %d from line %d ", ra, __LINE__); \
278     return ra; \
279     }
280     #else
281 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 nigel 77 #define RRETURN(ra) return ra
284 nigel 87 #endif
285    
286 nigel 77 #else
287    
288    
289 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
290     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291     argument of match(), which never changes. */
292 nigel 77
293     #define REGISTER
294    
295 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 nigel 77 {\
297 ph10 563 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298 ph10 534 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 ph10 164 frame->Xwhere = rw; \
300     newframe->Xeptr = ra;\
301     newframe->Xecode = rb;\
302 ph10 168 newframe->Xmstart = mstart;\
303 ph10 501 newframe->Xmarkptr = markptr;\
304 ph10 164 newframe->Xoffset_top = rc;\
305     newframe->Xims = re;\
306     newframe->Xeptrb = rf;\
307     newframe->Xflags = rg;\
308     newframe->Xrdepth = frame->Xrdepth + 1;\
309     newframe->Xprevframe = frame;\
310     frame = newframe;\
311     DPRINTF(("restarting from line %d\n", __LINE__));\
312     goto HEAP_RECURSE;\
313     L_##rw:\
314     DPRINTF(("jumped back to line %d\n", __LINE__));\
315 nigel 77 }
316    
317     #define RRETURN(ra)\
318     {\
319 ph10 527 heapframe *oldframe = frame;\
320     frame = oldframe->Xprevframe;\
321     (pcre_stack_free)(oldframe);\
322 nigel 77 if (frame != NULL)\
323     {\
324 ph10 164 rrc = ra;\
325     goto HEAP_RETURN;\
326 nigel 77 }\
327     return ra;\
328     }
329    
330    
331     /* Structure for remembering the local variables in a private frame */
332    
333     typedef struct heapframe {
334     struct heapframe *Xprevframe;
335    
336     /* Function arguments that may change */
337    
338 ph10 409 USPTR Xeptr;
339 nigel 77 const uschar *Xecode;
340 ph10 409 USPTR Xmstart;
341 ph10 501 USPTR Xmarkptr;
342 nigel 77 int Xoffset_top;
343     long int Xims;
344     eptrblock *Xeptrb;
345     int Xflags;
346 nigel 91 unsigned int Xrdepth;
347 nigel 77
348     /* Function local variables */
349    
350 ph10 409 USPTR Xcallpat;
351 ph10 406 #ifdef SUPPORT_UTF8
352 ph10 409 USPTR Xcharptr;
353 ph10 406 #endif
354 ph10 409 USPTR Xdata;
355     USPTR Xnext;
356     USPTR Xpp;
357     USPTR Xprev;
358     USPTR Xsaved_eptr;
359 nigel 77
360     recursion_info Xnew_recursive;
361    
362     BOOL Xcur_is_word;
363     BOOL Xcondition;
364     BOOL Xprev_is_word;
365    
366     unsigned long int Xoriginal_ims;
367    
368     #ifdef SUPPORT_UCP
369     int Xprop_type;
370 nigel 87 int Xprop_value;
371 nigel 77 int Xprop_fail_result;
372     int Xprop_category;
373     int Xprop_chartype;
374 nigel 87 int Xprop_script;
375 ph10 123 int Xoclength;
376     uschar Xocchars[8];
377 nigel 77 #endif
378    
379 ph10 403 int Xcodelink;
380 nigel 77 int Xctype;
381 nigel 93 unsigned int Xfc;
382 nigel 77 int Xfi;
383     int Xlength;
384     int Xmax;
385     int Xmin;
386     int Xnumber;
387     int Xoffset;
388     int Xop;
389     int Xsave_capture_last;
390     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
391     int Xstacksave[REC_STACK_SAVE_MAX];
392    
393     eptrblock Xnewptrb;
394    
395 ph10 164 /* Where to jump back to */
396 nigel 77
397 ph10 164 int Xwhere;
398 ph10 165
399 nigel 77 } heapframe;
400    
401     #endif
402    
403    
404     /***************************************************************************
405     ***************************************************************************/
406    
407    
408    
409     /*************************************************
410     * Match from current position *
411     *************************************************/
412    
413 nigel 93 /* This function is called recursively in many circumstances. Whenever it
414 nigel 77 returns a negative (error) response, the outer incarnation must also return the
415 ph10 426 same response. */
416 nigel 77
417 ph10 426 /* These macros pack up tests that are used for partial matching, and which
418     appears several times in the code. We set the "hit end" flag if the pointer is
419     at the end of the subject and also past the start of the subject (i.e.
420 ph10 427 something has been matched). For hard partial matching, we then return
421     immediately. The second one is used when we already know we are past the end of
422     the subject. */
423 ph10 426
424     #define CHECK_PARTIAL()\
425 ph10 553 if (md->partial != 0 && eptr >= md->end_subject && \
426     eptr > md->start_used_ptr) \
427     { \
428     md->hitend = TRUE; \
429     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430 ph10 427 }
431 ph10 426
432     #define SCHECK_PARTIAL()\
433 ph10 553 if (md->partial != 0 && eptr > md->start_used_ptr) \
434     { \
435     md->hitend = TRUE; \
436     if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437 ph10 427 }
438 ph10 426
439 ph10 427
440 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
441     the md structure (e.g. utf8, end_subject) into individual variables to improve
442 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
443     made performance worse.
444    
445     Arguments:
446 nigel 93 eptr pointer to current character in subject
447     ecode pointer to current position in compiled code
448 ph10 168 mstart pointer to the current match start position (can be modified
449 ph10 172 by encountering \K)
450 ph10 501 markptr pointer to the most recent MARK name, or NULL
451 nigel 77 offset_top current top pointer
452     md pointer to "static" info for the match
453     ims current /i, /m, and /s options
454     eptrb pointer to chain of blocks containing eptr at start of
455     brackets - for testing for empty matches
456     flags can contain
457     match_condassert - this is an assertion condition
458 nigel 93 match_cbegroup - this is the start of an unlimited repeat
459     group that can match an empty string
460 nigel 87 rdepth the recursion depth
461 nigel 77
462     Returns: MATCH_MATCH if matched ) these values are >= 0
463     MATCH_NOMATCH if failed to match )
464 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
465 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
466 nigel 87 (e.g. stopped by repeated call or recursion limit)
467 nigel 77 */
468    
469     static int
470 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
471     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
472 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
473 nigel 77 {
474     /* These variables do not need to be preserved over recursion in this function,
475 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
476     "register" because they are used a lot in loops. */
477 nigel 77
478 nigel 91 register int rrc; /* Returns from recursive calls */
479     register int i; /* Used for loops not involving calls to RMATCH() */
480 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
481 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
482 nigel 77
483 nigel 93 BOOL minimize, possessive; /* Quantifier options */
484 ph10 403 int condcode;
485 nigel 93
486 nigel 77 /* When recursion is not being used, all "local" variables that have to be
487     preserved over calls to RMATCH() are part of a "frame" which is obtained from
488     heap storage. Set up the top-level frame here; others are obtained from the
489     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490    
491     #ifdef NO_RECURSE
492 ph10 563 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493 ph10 531 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494 nigel 77 frame->Xprevframe = NULL; /* Marks the top level */
495    
496     /* Copy in the original argument variables */
497    
498     frame->Xeptr = eptr;
499     frame->Xecode = ecode;
500 ph10 168 frame->Xmstart = mstart;
501 ph10 501 frame->Xmarkptr = markptr;
502 nigel 77 frame->Xoffset_top = offset_top;
503     frame->Xims = ims;
504     frame->Xeptrb = eptrb;
505     frame->Xflags = flags;
506 nigel 87 frame->Xrdepth = rdepth;
507 nigel 77
508     /* This is where control jumps back to to effect "recursion" */
509    
510     HEAP_RECURSE:
511    
512     /* Macros make the argument variables come from the current frame */
513    
514     #define eptr frame->Xeptr
515     #define ecode frame->Xecode
516 ph10 168 #define mstart frame->Xmstart
517 ph10 501 #define markptr frame->Xmarkptr
518 nigel 77 #define offset_top frame->Xoffset_top
519     #define ims frame->Xims
520     #define eptrb frame->Xeptrb
521     #define flags frame->Xflags
522 nigel 87 #define rdepth frame->Xrdepth
523 nigel 77
524     /* Ditto for the local variables */
525    
526     #ifdef SUPPORT_UTF8
527     #define charptr frame->Xcharptr
528     #endif
529     #define callpat frame->Xcallpat
530 ph10 403 #define codelink frame->Xcodelink
531 nigel 77 #define data frame->Xdata
532     #define next frame->Xnext
533     #define pp frame->Xpp
534     #define prev frame->Xprev
535     #define saved_eptr frame->Xsaved_eptr
536    
537     #define new_recursive frame->Xnew_recursive
538    
539     #define cur_is_word frame->Xcur_is_word
540     #define condition frame->Xcondition
541     #define prev_is_word frame->Xprev_is_word
542    
543     #define original_ims frame->Xoriginal_ims
544    
545     #ifdef SUPPORT_UCP
546     #define prop_type frame->Xprop_type
547 nigel 87 #define prop_value frame->Xprop_value
548 nigel 77 #define prop_fail_result frame->Xprop_fail_result
549     #define prop_category frame->Xprop_category
550     #define prop_chartype frame->Xprop_chartype
551 nigel 87 #define prop_script frame->Xprop_script
552 ph10 115 #define oclength frame->Xoclength
553     #define occhars frame->Xocchars
554 nigel 77 #endif
555    
556     #define ctype frame->Xctype
557     #define fc frame->Xfc
558     #define fi frame->Xfi
559     #define length frame->Xlength
560     #define max frame->Xmax
561     #define min frame->Xmin
562     #define number frame->Xnumber
563     #define offset frame->Xoffset
564     #define op frame->Xop
565     #define save_capture_last frame->Xsave_capture_last
566     #define save_offset1 frame->Xsave_offset1
567     #define save_offset2 frame->Xsave_offset2
568     #define save_offset3 frame->Xsave_offset3
569     #define stacksave frame->Xstacksave
570    
571     #define newptrb frame->Xnewptrb
572    
573     /* When recursion is being used, local variables are allocated on the stack and
574     get preserved during recursion in the normal way. In this environment, fi and
575     i, and fc and c, can be the same variables. */
576    
577 nigel 93 #else /* NO_RECURSE not defined */
578 nigel 77 #define fi i
579     #define fc c
580    
581    
582 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
583     const uschar *charptr; /* in small blocks of the code. My normal */
584     #endif /* style of coding would have declared */
585     const uschar *callpat; /* them within each of those blocks. */
586     const uschar *data; /* However, in order to accommodate the */
587     const uschar *next; /* version of this code that uses an */
588     USPTR pp; /* external "stack" implemented on the */
589     const uschar *prev; /* heap, it is easier to declare them all */
590     USPTR saved_eptr; /* here, so the declarations can be cut */
591     /* out in a block. The only declarations */
592     recursion_info new_recursive; /* within blocks below are for variables */
593     /* that do not have to be preserved over */
594     BOOL cur_is_word; /* a recursive call to RMATCH(). */
595     BOOL condition;
596 nigel 77 BOOL prev_is_word;
597    
598     unsigned long int original_ims;
599    
600     #ifdef SUPPORT_UCP
601     int prop_type;
602 nigel 87 int prop_value;
603 nigel 77 int prop_fail_result;
604     int prop_category;
605     int prop_chartype;
606 nigel 87 int prop_script;
607 ph10 115 int oclength;
608     uschar occhars[8];
609 nigel 77 #endif
610    
611 ph10 399 int codelink;
612 nigel 77 int ctype;
613     int length;
614     int max;
615     int min;
616     int number;
617     int offset;
618     int op;
619     int save_capture_last;
620     int save_offset1, save_offset2, save_offset3;
621     int stacksave[REC_STACK_SAVE_MAX];
622    
623     eptrblock newptrb;
624 nigel 93 #endif /* NO_RECURSE */
625 nigel 77
626     /* These statements are here to stop the compiler complaining about unitialized
627     variables. */
628    
629     #ifdef SUPPORT_UCP
630 nigel 87 prop_value = 0;
631 nigel 77 prop_fail_result = 0;
632     #endif
633    
634 nigel 93
635 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
636     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
637     used. Thanks to Ian Taylor for noticing this possibility and sending the
638     original patch. */
639    
640     TAIL_RECURSE:
641    
642 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
643     are specified by the macro RMATCH and RRETURN is used to return. When
644     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
645 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
646 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
647     complicated macro. It has to be used in one particular way. This shouldn't,
648     however, impact performance when true recursion is being used. */
649 nigel 77
650 ph10 164 #ifdef SUPPORT_UTF8
651     utf8 = md->utf8; /* Local copy of the flag */
652     #else
653     utf8 = FALSE;
654     #endif
655    
656 nigel 87 /* First check that we haven't called match() too many times, or that we
657     haven't exceeded the recursive call limit. */
658    
659 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
660 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
661 nigel 77
662     original_ims = ims; /* Save for resetting on ')' */
663 nigel 91
664 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
665     string, the match_cbegroup flag is set. When this is the case, add the current
666     subject pointer to the chain of such remembered pointers, to be checked when we
667     hit the closing ket, in order to break infinite loops that match no characters.
668 ph10 197 When match() is called in other circumstances, don't add to the chain. The
669     match_cbegroup flag must NOT be used with tail recursion, because the memory
670     block that is used is on the stack, so a new one may be required for each
671     match(). */
672 nigel 77
673 nigel 93 if ((flags & match_cbegroup) != 0)
674 nigel 77 {
675 ph10 197 newptrb.epb_saved_eptr = eptr;
676     newptrb.epb_prev = eptrb;
677     eptrb = &newptrb;
678 nigel 77 }
679    
680 nigel 93 /* Now start processing the opcodes. */
681 nigel 77
682     for (;;)
683     {
684 nigel 93 minimize = possessive = FALSE;
685 nigel 77 op = *ecode;
686 ph10 443
687 nigel 93 switch(op)
688     {
689 ph10 510 case OP_MARK:
690     markptr = ecode + 2;
691     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
692 ph10 512 ims, eptrb, flags, RM55);
693    
694     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
695     argument, and we must check whether that argument matches this MARK's
696     argument. It is passed back in md->start_match_ptr (an overloading of that
697     variable). If it does match, we reset that variable to the current subject
698     position and return MATCH_SKIP. Otherwise, pass back the return code
699 ph10 510 unaltered. */
700 ph10 512
701     if (rrc == MATCH_SKIP_ARG &&
702 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
703     {
704     md->start_match_ptr = eptr;
705     RRETURN(MATCH_SKIP);
706     }
707    
708 ph10 512 if (md->mark == NULL) md->mark = markptr;
709 ph10 510 RRETURN(rrc);
710    
711 ph10 210 case OP_FAIL:
712 ph10 510 MRRETURN(MATCH_NOMATCH);
713 ph10 211
714 ph10 551 /* COMMIT overrides PRUNE, SKIP, and THEN */
715 ph10 553
716 ph10 510 case OP_COMMIT:
717     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718     ims, eptrb, flags, RM52);
719 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720 ph10 553 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721     rrc != MATCH_THEN)
722 ph10 551 RRETURN(rrc);
723 ph10 510 MRRETURN(MATCH_COMMIT);
724    
725 ph10 551 /* PRUNE overrides THEN */
726 ph10 553
727 ph10 210 case OP_PRUNE:
728     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729     ims, eptrb, flags, RM51);
730 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731 ph10 510 MRRETURN(MATCH_PRUNE);
732 ph10 211
733 ph10 510 case OP_PRUNE_ARG:
734     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735 ph10 512 ims, eptrb, flags, RM56);
736 ph10 551 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737 ph10 510 md->mark = ecode + 2;
738     RRETURN(MATCH_PRUNE);
739 ph10 211
740 ph10 551 /* SKIP overrides PRUNE and THEN */
741 ph10 553
742 ph10 210 case OP_SKIP:
743     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744     ims, eptrb, flags, RM53);
745 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746 ph10 551 RRETURN(rrc);
747 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
748 ph10 510 MRRETURN(MATCH_SKIP);
749 ph10 211
750 ph10 510 case OP_SKIP_ARG:
751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ph10 512 ims, eptrb, flags, RM57);
753 ph10 553 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754 ph10 551 RRETURN(rrc);
755 ph10 512
756     /* Pass back the current skip name by overloading md->start_match_ptr and
757     returning the special MATCH_SKIP_ARG return code. This will either be
758     caught by a matching MARK, or get to the top, where it is treated the same
759 ph10 510 as PRUNE. */
760 ph10 512
761 ph10 510 md->start_match_ptr = ecode + 2;
762 ph10 512 RRETURN(MATCH_SKIP_ARG);
763 ph10 553
764 ph10 550 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765 ph10 553 the alt that is at the start of the current branch. This makes it possible
766     to skip back past alternatives that precede the THEN within the current
767     branch. */
768 ph10 512
769 ph10 210 case OP_THEN:
770     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771 ph10 212 ims, eptrb, flags, RM54);
772 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
774 ph10 510 MRRETURN(MATCH_THEN);
775    
776     case OP_THEN_ARG:
777 ph10 553 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778 ph10 550 offset_top, md, ims, eptrb, flags, RM58);
779 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780 ph10 550 md->start_match_ptr = ecode - GET(ecode, 1);
781     md->mark = ecode + LINK_SIZE + 2;
782 ph10 212 RRETURN(MATCH_THEN);
783 ph10 211
784 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
785     the current subject position in the working slot at the top of the vector.
786     We mustn't change the current values of the data slot, because they may be
787     set from a previous iteration of this group, and be referred to by a
788     reference inside the group.
789 nigel 77
790 nigel 93 If the bracket fails to match, we need to restore this value and also the
791     values of the final offsets, in case they were set by a previous iteration
792     of the same bracket.
793 nigel 77
794 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
795     a non-capturing bracket. Don't worry about setting the flag for the error
796     case here; that is handled in the code for KET. */
797 nigel 77
798 nigel 93 case OP_CBRA:
799     case OP_SCBRA:
800     number = GET2(ecode, 1+LINK_SIZE);
801 nigel 77 offset = number << 1;
802    
803 ph10 475 #ifdef PCRE_DEBUG
804 nigel 93 printf("start bracket %d\n", number);
805     printf("subject=");
806 nigel 77 pchars(eptr, 16, TRUE, md);
807     printf("\n");
808     #endif
809    
810     if (offset < md->offset_max)
811     {
812     save_offset1 = md->offset_vector[offset];
813     save_offset2 = md->offset_vector[offset+1];
814     save_offset3 = md->offset_vector[md->offset_end - number];
815     save_capture_last = md->capture_last;
816    
817     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818 ph10 531 md->offset_vector[md->offset_end - number] =
819 ph10 530 (int)(eptr - md->start_subject);
820 nigel 77
821 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
822 nigel 77 do
823     {
824 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825     ims, eptrb, flags, RM1);
826 ph10 550 if (rrc != MATCH_NOMATCH &&
827     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828     RRETURN(rrc);
829 nigel 77 md->capture_last = save_capture_last;
830     ecode += GET(ecode, 1);
831     }
832     while (*ecode == OP_ALT);
833    
834     DPRINTF(("bracket %d failed\n", number));
835    
836     md->offset_vector[offset] = save_offset1;
837     md->offset_vector[offset+1] = save_offset2;
838     md->offset_vector[md->offset_end - number] = save_offset3;
839    
840 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
841 nigel 77 RRETURN(MATCH_NOMATCH);
842     }
843    
844 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
845     as a non-capturing bracket. */
846 nigel 77
847 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
849    
850 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
851 nigel 77
852 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
854    
855 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
856     final alternative within the brackets, we would return the result of a
857     recursive call to match() whatever happened. We can reduce stack usage by
858 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
859     is set.*/
860 nigel 77
861 nigel 93 case OP_BRA:
862     case OP_SBRA:
863     DPRINTF(("start non-capturing bracket\n"));
864     flags = (op >= OP_SBRA)? match_cbegroup : 0;
865 nigel 91 for (;;)
866 nigel 77 {
867 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
868 nigel 93 {
869 ph10 197 if (flags == 0) /* Not a possibly empty group */
870     {
871     ecode += _pcre_OP_lengths[*ecode];
872     DPRINTF(("bracket 0 tail recursion\n"));
873     goto TAIL_RECURSE;
874     }
875    
876     /* Possibly empty group; can't use tail recursion. */
877    
878     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
879     eptrb, flags, RM48);
880 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
881     RRETURN(rrc);
882 nigel 93 }
883 nigel 91
884     /* For non-final alternatives, continue the loop for a NOMATCH result;
885     otherwise return. */
886    
887 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888     eptrb, flags, RM2);
889 ph10 550 if (rrc != MATCH_NOMATCH &&
890     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891     RRETURN(rrc);
892 nigel 77 ecode += GET(ecode, 1);
893     }
894 nigel 91 /* Control never reaches here. */
895 nigel 77
896     /* Conditional group: compilation checked that there are no more than
897     two branches. If the condition is false, skipping the first branch takes us
898     past the end if there is only one branch, but that's OK because that is
899 nigel 91 exactly what going to the ket would do. As there is only one branch to be
900     obeyed, we can use tail recursion to avoid using another stack frame. */
901 nigel 77
902     case OP_COND:
903 nigel 93 case OP_SCOND:
904 ph10 399 codelink= GET(ecode, 1);
905 ph10 406
906 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
907     inserted between OP_COND and an assertion condition. */
908 ph10 392
909 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
910     {
911     if (pcre_callout != NULL)
912     {
913     pcre_callout_block cb;
914     cb.version = 1; /* Version 1 of the callout block */
915     cb.callout_number = ecode[LINK_SIZE+2];
916     cb.offset_vector = md->offset_vector;
917     cb.subject = (PCRE_SPTR)md->start_subject;
918 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
919     cb.start_match = (int)(mstart - md->start_subject);
920     cb.current_position = (int)(eptr - md->start_subject);
921 ph10 381 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
922     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
923     cb.capture_top = offset_top/2;
924     cb.capture_last = md->capture_last;
925     cb.callout_data = md->callout_data;
926 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
927 ph10 381 if (rrc < 0) RRETURN(rrc);
928     }
929     ecode += _pcre_OP_lengths[OP_CALLOUT];
930     }
931 ph10 392
932 ph10 399 condcode = ecode[LINK_SIZE+1];
933 ph10 406
934 ph10 381 /* Now see what the actual condition is */
935 ph10 392
936 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
937 nigel 77 {
938 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
939     {
940 ph10 461 condition = FALSE;
941     ecode += GET(ecode, 1);
942     }
943 ph10 459 else
944 ph10 461 {
945 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
946     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
947 ph10 461
948 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
949     false, but the test was set up by name, scan the table to see if the
950     name refers to any other numbers, and test them. The condition is true
951     if any one is set. */
952 ph10 461
953 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
954     {
955     uschar *slotA = md->name_table;
956     for (i = 0; i < md->name_count; i++)
957 ph10 461 {
958     if (GET2(slotA, 0) == recno) break;
959 ph10 459 slotA += md->name_entry_size;
960     }
961 ph10 461
962 ph10 459 /* Found a name for the number - there can be only one; duplicate
963     names for different numbers are allowed, but not vice versa. First
964     scan down for duplicates. */
965 ph10 461
966 ph10 459 if (i < md->name_count)
967 ph10 461 {
968 ph10 459 uschar *slotB = slotA;
969     while (slotB > md->name_table)
970     {
971     slotB -= md->name_entry_size;
972     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
973     {
974     condition = GET2(slotB, 0) == md->recursive->group_num;
975 ph10 461 if (condition) break;
976     }
977 ph10 459 else break;
978 ph10 461 }
979    
980 ph10 459 /* Scan up for duplicates */
981 ph10 461
982 ph10 459 if (!condition)
983 ph10 461 {
984 ph10 459 slotB = slotA;
985     for (i++; i < md->name_count; i++)
986     {
987     slotB += md->name_entry_size;
988     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
989     {
990     condition = GET2(slotB, 0) == md->recursive->group_num;
991     if (condition) break;
992 ph10 461 }
993 ph10 459 else break;
994 ph10 461 }
995     }
996 ph10 459 }
997 ph10 461 }
998    
999 ph10 459 /* Chose branch according to the condition */
1000 ph10 461
1001 ph10 459 ecode += condition? 3 : GET(ecode, 1);
1002     }
1003 ph10 461 }
1004 nigel 93
1005 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1006 nigel 93 {
1007 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1008 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1009 ph10 461
1010 ph10 459 /* If the numbered capture is unset, but the reference was by name,
1011 ph10 461 scan the table to see if the name refers to any other numbers, and test
1012     them. The condition is true if any one is set. This is tediously similar
1013     to the code above, but not close enough to try to amalgamate. */
1014    
1015 ph10 459 if (!condition && condcode == OP_NCREF)
1016     {
1017 ph10 461 int refno = offset >> 1;
1018 ph10 459 uschar *slotA = md->name_table;
1019 ph10 461
1020 ph10 459 for (i = 0; i < md->name_count; i++)
1021 ph10 461 {
1022     if (GET2(slotA, 0) == refno) break;
1023 ph10 459 slotA += md->name_entry_size;
1024     }
1025 ph10 461
1026     /* Found a name for the number - there can be only one; duplicate names
1027     for different numbers are allowed, but not vice versa. First scan down
1028 ph10 459 for duplicates. */
1029 ph10 461
1030 ph10 459 if (i < md->name_count)
1031 ph10 461 {
1032 ph10 459 uschar *slotB = slotA;
1033     while (slotB > md->name_table)
1034     {
1035     slotB -= md->name_entry_size;
1036     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1037     {
1038     offset = GET2(slotB, 0) << 1;
1039 ph10 461 condition = offset < offset_top &&
1040 ph10 459 md->offset_vector[offset] >= 0;
1041 ph10 461 if (condition) break;
1042     }
1043 ph10 459 else break;
1044 ph10 461 }
1045    
1046 ph10 459 /* Scan up for duplicates */
1047 ph10 461
1048 ph10 459 if (!condition)
1049 ph10 461 {
1050 ph10 459 slotB = slotA;
1051     for (i++; i < md->name_count; i++)
1052     {
1053     slotB += md->name_entry_size;
1054     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1055     {
1056     offset = GET2(slotB, 0) << 1;
1057 ph10 461 condition = offset < offset_top &&
1058 ph10 459 md->offset_vector[offset] >= 0;
1059 ph10 461 if (condition) break;
1060     }
1061 ph10 459 else break;
1062 ph10 461 }
1063     }
1064 ph10 459 }
1065 ph10 461 }
1066    
1067 ph10 459 /* Chose branch according to the condition */
1068    
1069 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1070 nigel 77 }
1071    
1072 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1073 nigel 93 {
1074     condition = FALSE;
1075     ecode += GET(ecode, 1);
1076     }
1077    
1078 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1079 nigel 93 the final argument match_condassert causes it to stop at the end of an
1080     assertion. */
1081 nigel 77
1082     else
1083     {
1084 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1085     match_condassert, RM3);
1086 nigel 77 if (rrc == MATCH_MATCH)
1087     {
1088 nigel 93 condition = TRUE;
1089     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091     }
1092 ph10 550 else if (rrc != MATCH_NOMATCH &&
1093     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094 nigel 77 {
1095     RRETURN(rrc); /* Need braces because of following else */
1096     }
1097 nigel 93 else
1098     {
1099     condition = FALSE;
1100 ph10 399 ecode += codelink;
1101 nigel 93 }
1102     }
1103 nigel 91
1104 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1105 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1106     match_cbegroup is required for an unlimited repeat of a possibly empty
1107     group. If the second alternative doesn't exist, we can just plough on. */
1108 nigel 91
1109 nigel 93 if (condition || *ecode == OP_ALT)
1110     {
1111 nigel 91 ecode += 1 + LINK_SIZE;
1112 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1113     {
1114     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1115     RRETURN(rrc);
1116     }
1117     else /* Group must match something */
1118     {
1119     flags = 0;
1120     goto TAIL_RECURSE;
1121     }
1122 nigel 77 }
1123 ph10 395 else /* Condition false & no alternative */
1124 nigel 93 {
1125     ecode += 1 + LINK_SIZE;
1126     }
1127     break;
1128 nigel 77
1129 ph10 461
1130 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1131     to close any currently open capturing brackets. */
1132 ph10 461
1133 ph10 447 case OP_CLOSE:
1134 ph10 461 number = GET2(ecode, 1);
1135 ph10 447 offset = number << 1;
1136 ph10 461
1137 ph10 475 #ifdef PCRE_DEBUG
1138 ph10 447 printf("end bracket %d at *ACCEPT", number);
1139     printf("\n");
1140     #endif
1141 nigel 77
1142 ph10 447 md->capture_last = number;
1143     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1144     {
1145     md->offset_vector[offset] =
1146     md->offset_vector[md->offset_end - number];
1147 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148 ph10 447 if (offset_top <= offset) offset_top = offset + 2;
1149     }
1150     ecode += 3;
1151 ph10 461 break;
1152 ph10 447
1153    
1154 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1155     recursion, we should restore the offsets appropriately and continue from
1156     after the call. */
1157 nigel 77
1158 ph10 210 case OP_ACCEPT:
1159 nigel 77 case OP_END:
1160     if (md->recursive != NULL && md->recursive->group_num == 0)
1161     {
1162     recursion_info *rec = md->recursive;
1163 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1164 nigel 77 md->recursive = rec->prevrec;
1165     memmove(md->offset_vector, rec->offset_save,
1166     rec->saved_max * sizeof(int));
1167 ph10 461 offset_top = rec->save_offset_top;
1168 nigel 77 ims = original_ims;
1169     ecode = rec->after_call;
1170     break;
1171     }
1172    
1173 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1174     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1175     the subject. In both cases, backtracking will then try other alternatives,
1176     if any. */
1177 ph10 443
1178 ph10 442 if (eptr == mstart &&
1179     (md->notempty ||
1180 ph10 443 (md->notempty_atstart &&
1181 ph10 442 mstart == md->start_subject + md->start_offset)))
1182 ph10 510 MRRETURN(MATCH_NOMATCH);
1183 ph10 443
1184 ph10 442 /* Otherwise, we have a match. */
1185 nigel 77
1186 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1187     md->end_offset_top = offset_top; /* and how many extracts were taken */
1188 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1189 nigel 77
1190 ph10 512 /* For some reason, the macros don't work properly if an expression is
1191     given as the argument to MRRETURN when the heap is in use. */
1192    
1193     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1194     MRRETURN(rrc);
1195    
1196 nigel 77 /* Change option settings */
1197    
1198     case OP_OPT:
1199     ims = ecode[1];
1200     ecode += 2;
1201     DPRINTF(("ims set to %02lx\n", ims));
1202     break;
1203    
1204     /* Assertion brackets. Check the alternative branches in turn - the
1205     matching won't pass the KET for an assertion. If any one branch matches,
1206     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1207     start of each branch to move the current point backwards, so the code at
1208     this level is identical to the lookahead case. */
1209    
1210     case OP_ASSERT:
1211     case OP_ASSERTBACK:
1212     do
1213     {
1214 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215     RM4);
1216 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1217 ph10 500 {
1218     mstart = md->start_match_ptr; /* In case \K reset it */
1219     break;
1220 ph10 501 }
1221 ph10 550 if (rrc != MATCH_NOMATCH &&
1222     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223     RRETURN(rrc);
1224 nigel 77 ecode += GET(ecode, 1);
1225     }
1226     while (*ecode == OP_ALT);
1227 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1228 nigel 77
1229     /* If checking an assertion for a condition, return MATCH_MATCH. */
1230    
1231     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1232    
1233     /* Continue from after the assertion, updating the offsets high water
1234     mark, since extracts may have been taken during the assertion. */
1235    
1236     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1237     ecode += 1 + LINK_SIZE;
1238     offset_top = md->end_offset_top;
1239     continue;
1240    
1241 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1242 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1243 ph10 473 branches. */
1244 nigel 77
1245     case OP_ASSERT_NOT:
1246     case OP_ASSERTBACK_NOT:
1247     do
1248     {
1249 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1250     RM5);
1251 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1252 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1253     {
1254     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255 ph10 482 break;
1256     }
1257 ph10 550 if (rrc != MATCH_NOMATCH &&
1258     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259     RRETURN(rrc);
1260 nigel 77 ecode += GET(ecode,1);
1261     }
1262     while (*ecode == OP_ALT);
1263    
1264     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1265    
1266     ecode += 1 + LINK_SIZE;
1267     continue;
1268    
1269     /* Move the subject pointer back. This occurs only at the start of
1270     each branch of a lookbehind assertion. If we are too close to the start to
1271     move back, this match function fails. When working with UTF-8 we move
1272     back a number of characters, not bytes. */
1273    
1274     case OP_REVERSE:
1275     #ifdef SUPPORT_UTF8
1276     if (utf8)
1277     {
1278 nigel 93 i = GET(ecode, 1);
1279     while (i-- > 0)
1280 nigel 77 {
1281     eptr--;
1282 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1283 ph10 207 BACKCHAR(eptr);
1284 nigel 77 }
1285     }
1286     else
1287     #endif
1288    
1289     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1290    
1291     {
1292 nigel 93 eptr -= GET(ecode, 1);
1293 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294 nigel 77 }
1295    
1296 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1297 nigel 77
1298 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1299 nigel 77 ecode += 1 + LINK_SIZE;
1300     break;
1301    
1302     /* The callout item calls an external function, if one is provided, passing
1303     details of the match so far. This is mainly for debugging, though the
1304     function is able to force a failure. */
1305    
1306     case OP_CALLOUT:
1307     if (pcre_callout != NULL)
1308     {
1309     pcre_callout_block cb;
1310     cb.version = 1; /* Version 1 of the callout block */
1311     cb.callout_number = ecode[1];
1312     cb.offset_vector = md->offset_vector;
1313 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1314 ph10 530 cb.subject_length = (int)(md->end_subject - md->start_subject);
1315     cb.start_match = (int)(mstart - md->start_subject);
1316     cb.current_position = (int)(eptr - md->start_subject);
1317 nigel 77 cb.pattern_position = GET(ecode, 2);
1318     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319     cb.capture_top = offset_top/2;
1320     cb.capture_last = md->capture_last;
1321     cb.callout_data = md->callout_data;
1322 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1323 nigel 77 if (rrc < 0) RRETURN(rrc);
1324     }
1325     ecode += 2 + 2*LINK_SIZE;
1326     break;
1327    
1328     /* Recursion either matches the current regex, or some subexpression. The
1329     offset data is the offset to the starting bracket from the start of the
1330     whole pattern. (This is so that it works from duplicated subpatterns.)
1331    
1332     If there are any capturing brackets started but not finished, we have to
1333     save their starting points and reinstate them after the recursion. However,
1334     we don't know how many such there are (offset_top records the completed
1335     total) so we just have to save all the potential data. There may be up to
1336     65535 such values, which is too large to put on the stack, but using malloc
1337     for small numbers seems expensive. As a compromise, the stack is used when
1338     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1339     is used. A problem is what to do if the malloc fails ... there is no way of
1340     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1341     values on the stack, and accept that the rest may be wrong.
1342    
1343     There are also other values that have to be saved. We use a chained
1344     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1345     for the original version of this logic. */
1346    
1347     case OP_RECURSE:
1348     {
1349     callpat = md->start_code + GET(ecode, 1);
1350 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1351     GET2(callpat, 1 + LINK_SIZE);
1352 nigel 77
1353     /* Add to "recursing stack" */
1354    
1355     new_recursive.prevrec = md->recursive;
1356     md->recursive = &new_recursive;
1357    
1358     /* Find where to continue from afterwards */
1359    
1360     ecode += 1 + LINK_SIZE;
1361     new_recursive.after_call = ecode;
1362    
1363     /* Now save the offset data. */
1364    
1365     new_recursive.saved_max = md->offset_end;
1366     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1367     new_recursive.offset_save = stacksave;
1368     else
1369     {
1370     new_recursive.offset_save =
1371     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1372     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1373     }
1374    
1375     memcpy(new_recursive.offset_save, md->offset_vector,
1376     new_recursive.saved_max * sizeof(int));
1377 ph10 461 new_recursive.save_offset_top = offset_top;
1378 nigel 77
1379     /* OK, now we can do the recursion. For each top-level alternative we
1380     restore the offset and recursion data. */
1381    
1382     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1383 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1384 nigel 77 do
1385     {
1386 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1387     md, ims, eptrb, flags, RM6);
1388 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1389 nigel 77 {
1390 nigel 87 DPRINTF(("Recursion matched\n"));
1391 nigel 77 md->recursive = new_recursive.prevrec;
1392     if (new_recursive.offset_save != stacksave)
1393     (pcre_free)(new_recursive.offset_save);
1394 ph10 510 MRRETURN(MATCH_MATCH);
1395 nigel 77 }
1396 ph10 550 else if (rrc != MATCH_NOMATCH &&
1397     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398 nigel 87 {
1399     DPRINTF(("Recursion gave error %d\n", rrc));
1400 ph10 400 if (new_recursive.offset_save != stacksave)
1401     (pcre_free)(new_recursive.offset_save);
1402 nigel 87 RRETURN(rrc);
1403     }
1404 nigel 77
1405     md->recursive = &new_recursive;
1406     memcpy(md->offset_vector, new_recursive.offset_save,
1407     new_recursive.saved_max * sizeof(int));
1408     callpat += GET(callpat, 1);
1409     }
1410     while (*callpat == OP_ALT);
1411    
1412     DPRINTF(("Recursion didn't match\n"));
1413     md->recursive = new_recursive.prevrec;
1414     if (new_recursive.offset_save != stacksave)
1415     (pcre_free)(new_recursive.offset_save);
1416 ph10 510 MRRETURN(MATCH_NOMATCH);
1417 nigel 77 }
1418     /* Control never reaches here */
1419    
1420     /* "Once" brackets are like assertion brackets except that after a match,
1421     the point in the subject string is not moved back. Thus there can never be
1422     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423     Check the alternative branches in turn - the matching won't pass the KET
1424     for this kind of subpattern. If any one branch matches, we carry on as at
1425 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1426     the start-of-match value in case it was changed by \K. */
1427 nigel 77
1428     case OP_ONCE:
1429 nigel 91 prev = ecode;
1430     saved_eptr = eptr;
1431    
1432     do
1433 nigel 77 {
1434 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1435 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1436 ph10 500 {
1437     mstart = md->start_match_ptr;
1438     break;
1439 ph10 501 }
1440 ph10 550 if (rrc != MATCH_NOMATCH &&
1441     (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442     RRETURN(rrc);
1443 nigel 91 ecode += GET(ecode,1);
1444     }
1445     while (*ecode == OP_ALT);
1446 nigel 77
1447 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1448 nigel 77
1449 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1450 nigel 77
1451 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1452     mark, since extracts may have been taken. */
1453 nigel 77
1454 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1455 nigel 77
1456 nigel 91 offset_top = md->end_offset_top;
1457     eptr = md->end_match_ptr;
1458 nigel 77
1459 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1460     happens for a repeating ket if no characters were matched in the group.
1461     This is the forcible breaking of infinite loops as implemented in Perl
1462     5.005. If there is an options reset, it will get obeyed in the normal
1463     course of events. */
1464 nigel 77
1465 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1466     {
1467     ecode += 1+LINK_SIZE;
1468     break;
1469     }
1470 nigel 77
1471 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1472     preceding bracket, in the appropriate order. The second "call" of match()
1473     uses tail recursion, to avoid using another stack frame. We need to reset
1474     any options that changed within the bracket before re-running it, so
1475     check the next opcode. */
1476 nigel 77
1477 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1478     {
1479     ims = (ims & ~PCRE_IMS) | ecode[4];
1480     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1481     }
1482 nigel 77
1483 nigel 91 if (*ecode == OP_KETRMIN)
1484     {
1485 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1486 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1487     ecode = prev;
1488 ph10 197 flags = 0;
1489 nigel 91 goto TAIL_RECURSE;
1490 nigel 77 }
1491 nigel 91 else /* OP_KETRMAX */
1492     {
1493 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1494 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495     ecode += 1 + LINK_SIZE;
1496 ph10 197 flags = 0;
1497 nigel 91 goto TAIL_RECURSE;
1498     }
1499     /* Control never gets here */
1500 nigel 77
1501     /* An alternation is the end of a branch; scan along to find the end of the
1502     bracketed group and go to there. */
1503    
1504     case OP_ALT:
1505     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1506     break;
1507    
1508 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1509     indicating that it may occur zero times. It may repeat infinitely, or not
1510     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1511     with fixed upper repeat limits are compiled as a number of copies, with the
1512     optional ones preceded by BRAZERO or BRAMINZERO. */
1513 nigel 77
1514     case OP_BRAZERO:
1515     {
1516     next = ecode+1;
1517 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1518 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1519     do next += GET(next,1); while (*next == OP_ALT);
1520 nigel 93 ecode = next + 1 + LINK_SIZE;
1521 nigel 77 }
1522     break;
1523    
1524     case OP_BRAMINZERO:
1525     {
1526     next = ecode+1;
1527 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1528 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1529 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530     ecode++;
1531     }
1532     break;
1533    
1534 ph10 335 case OP_SKIPZERO:
1535     {
1536     next = ecode+1;
1537     do next += GET(next,1); while (*next == OP_ALT);
1538     ecode = next + 1 + LINK_SIZE;
1539     }
1540     break;
1541    
1542 nigel 93 /* End of a group, repeated or non-repeating. */
1543 nigel 77
1544     case OP_KET:
1545     case OP_KETRMIN:
1546     case OP_KETRMAX:
1547 nigel 91 prev = ecode - GET(ecode, 1);
1548 nigel 77
1549 nigel 93 /* If this was a group that remembered the subject start, in order to break
1550     infinite repeats of empty string matches, retrieve the subject start from
1551     the chain. Otherwise, set it NULL. */
1552 nigel 77
1553 nigel 93 if (*prev >= OP_SBRA)
1554     {
1555     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1556     eptrb = eptrb->epb_prev; /* Backup to previous group */
1557     }
1558     else saved_eptr = NULL;
1559 nigel 77
1560 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1561     matching and return MATCH_MATCH, but record the current high water mark for
1562     use by positive assertions. We also need to record the match start in case
1563     it was changed by \K. */
1564 nigel 93
1565 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1566     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1567     *prev == OP_ONCE)
1568     {
1569     md->end_match_ptr = eptr; /* For ONCE */
1570     md->end_offset_top = offset_top;
1571 ph10 500 md->start_match_ptr = mstart;
1572 ph10 510 MRRETURN(MATCH_MATCH);
1573 nigel 91 }
1574 nigel 77
1575 nigel 93 /* For capturing groups we have to check the group number back at the start
1576     and if necessary complete handling an extraction by setting the offsets and
1577     bumping the high water mark. Note that whole-pattern recursion is coded as
1578     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1579     when the OP_END is reached. Other recursion is handled here. */
1580 nigel 77
1581 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1582 nigel 91 {
1583 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1584 nigel 91 offset = number << 1;
1585 ph10 461
1586 ph10 475 #ifdef PCRE_DEBUG
1587 nigel 91 printf("end bracket %d", number);
1588     printf("\n");
1589 nigel 77 #endif
1590    
1591 nigel 93 md->capture_last = number;
1592     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1593 nigel 91 {
1594 nigel 93 md->offset_vector[offset] =
1595     md->offset_vector[md->offset_end - number];
1596 ph10 530 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597 nigel 93 if (offset_top <= offset) offset_top = offset + 2;
1598     }
1599 nigel 77
1600 nigel 93 /* Handle a recursively called group. Restore the offsets
1601     appropriately and continue from after the call. */
1602 nigel 77
1603 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1604     {
1605     recursion_info *rec = md->recursive;
1606     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1607     md->recursive = rec->prevrec;
1608     memcpy(md->offset_vector, rec->offset_save,
1609     rec->saved_max * sizeof(int));
1610 ph10 461 offset_top = rec->save_offset_top;
1611 nigel 93 ecode = rec->after_call;
1612     ims = original_ims;
1613     break;
1614 nigel 77 }
1615 nigel 91 }
1616 nigel 77
1617 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1618     flags, in case they got changed during the group. */
1619 nigel 77
1620 nigel 91 ims = original_ims;
1621     DPRINTF(("ims reset to %02lx\n", ims));
1622 nigel 77
1623 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1624     happens for a repeating ket if no characters were matched in the group.
1625     This is the forcible breaking of infinite loops as implemented in Perl
1626     5.005. If there is an options reset, it will get obeyed in the normal
1627     course of events. */
1628 nigel 77
1629 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1630     {
1631     ecode += 1 + LINK_SIZE;
1632     break;
1633     }
1634 nigel 77
1635 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1636     preceding bracket, in the appropriate order. In the second case, we can use
1637 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1638     unlimited repeat of a group that can match an empty string. */
1639 nigel 77
1640 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1641    
1642 nigel 91 if (*ecode == OP_KETRMIN)
1643     {
1644 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1645 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646 ph10 197 if (flags != 0) /* Could match an empty string */
1647     {
1648     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1649     RRETURN(rrc);
1650     }
1651 nigel 91 ecode = prev;
1652     goto TAIL_RECURSE;
1653 nigel 77 }
1654 nigel 91 else /* OP_KETRMAX */
1655     {
1656 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1657 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1658     ecode += 1 + LINK_SIZE;
1659 ph10 197 flags = 0;
1660 nigel 91 goto TAIL_RECURSE;
1661     }
1662     /* Control never gets here */
1663 nigel 77
1664     /* Start of subject unless notbol, or after internal newline if multiline */
1665    
1666     case OP_CIRC:
1667 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1668 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1669     {
1670 nigel 91 if (eptr != md->start_subject &&
1671 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1672 ph10 510 MRRETURN(MATCH_NOMATCH);
1673 nigel 77 ecode++;
1674     break;
1675     }
1676     /* ... else fall through */
1677    
1678     /* Start of subject assertion */
1679    
1680     case OP_SOD:
1681 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1682 nigel 77 ecode++;
1683     break;
1684    
1685     /* Start of match assertion */
1686    
1687     case OP_SOM:
1688 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1689 nigel 77 ecode++;
1690     break;
1691 ph10 172
1692 ph10 168 /* Reset the start of match point */
1693 ph10 172
1694 ph10 168 case OP_SET_SOM:
1695     mstart = eptr;
1696 ph10 172 ecode++;
1697     break;
1698 nigel 77
1699     /* Assert before internal newline if multiline, or before a terminating
1700     newline unless endonly is set, else end of subject unless noteol is set. */
1701    
1702     case OP_DOLL:
1703     if ((ims & PCRE_MULTILINE) != 0)
1704     {
1705     if (eptr < md->end_subject)
1706 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707 nigel 77 else
1708 ph10 553 {
1709     if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710     SCHECK_PARTIAL();
1711     }
1712 nigel 77 ecode++;
1713     break;
1714     }
1715 ph10 553 else /* Not multiline */
1716 nigel 77 {
1717 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718 ph10 553 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1719 nigel 77 }
1720 ph10 553
1721 nigel 91 /* ... else fall through for endonly */
1722 nigel 77
1723     /* End of subject assertion (\z) */
1724    
1725     case OP_EOD:
1726 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727 ph10 553 SCHECK_PARTIAL();
1728 nigel 77 ecode++;
1729     break;
1730    
1731     /* End of subject or ending \n assertion (\Z) */
1732    
1733     case OP_EODN:
1734 ph10 553 ASSERT_NL_OR_EOS:
1735     if (eptr < md->end_subject &&
1736 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737 ph10 510 MRRETURN(MATCH_NOMATCH);
1738 ph10 553
1739     /* Either at end of string or \n before end. */
1740    
1741     SCHECK_PARTIAL();
1742 nigel 77 ecode++;
1743     break;
1744    
1745     /* Word boundary assertions */
1746    
1747     case OP_NOT_WORD_BOUNDARY:
1748     case OP_WORD_BOUNDARY:
1749     {
1750    
1751     /* Find out if the previous and current characters are "word" characters.
1752     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1753 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1754 ph10 435 partial matching. */
1755 nigel 77
1756     #ifdef SUPPORT_UTF8
1757     if (utf8)
1758     {
1759 ph10 518 /* Get status of previous character */
1760 ph10 527
1761 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1762     {
1763 ph10 409 USPTR lastptr = eptr - 1;
1764 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1765 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766 nigel 77 GETCHAR(c, lastptr);
1767 ph10 527 #ifdef SUPPORT_UCP
1768 ph10 518 if (md->use_ucp)
1769     {
1770     if (c == '_') prev_is_word = TRUE; else
1771 ph10 527 {
1772 ph10 518 int cat = UCD_CATEGORY(c);
1773     prev_is_word = (cat == ucp_L || cat == ucp_N);
1774 ph10 527 }
1775     }
1776     else
1777     #endif
1778 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779     }
1780 ph10 527
1781 ph10 518 /* Get status of next character */
1782 ph10 527
1783 ph10 443 if (eptr >= md->end_subject)
1784 nigel 77 {
1785 ph10 443 SCHECK_PARTIAL();
1786     cur_is_word = FALSE;
1787 ph10 428 }
1788     else
1789     {
1790 nigel 77 GETCHAR(c, eptr);
1791 ph10 527 #ifdef SUPPORT_UCP
1792 ph10 518 if (md->use_ucp)
1793     {
1794     if (c == '_') cur_is_word = TRUE; else
1795 ph10 527 {
1796 ph10 518 int cat = UCD_CATEGORY(c);
1797     cur_is_word = (cat == ucp_L || cat == ucp_N);
1798 ph10 527 }
1799     }
1800     else
1801     #endif
1802 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803     }
1804     }
1805     else
1806     #endif
1807    
1808 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1810 nigel 77
1811     {
1812 ph10 518 /* Get status of previous character */
1813 ph10 527
1814 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1815     {
1816 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817 ph10 527 #ifdef SUPPORT_UCP
1818 ph10 518 if (md->use_ucp)
1819     {
1820 ph10 527 c = eptr[-1];
1821 ph10 518 if (c == '_') prev_is_word = TRUE; else
1822 ph10 527 {
1823 ph10 518 int cat = UCD_CATEGORY(c);
1824     prev_is_word = (cat == ucp_L || cat == ucp_N);
1825 ph10 527 }
1826     }
1827     else
1828     #endif
1829 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830     }
1831 ph10 527
1832 ph10 518 /* Get status of next character */
1833 ph10 527
1834 ph10 443 if (eptr >= md->end_subject)
1835 ph10 428 {
1836 ph10 443 SCHECK_PARTIAL();
1837     cur_is_word = FALSE;
1838 ph10 428 }
1839 ph10 527 else
1840     #ifdef SUPPORT_UCP
1841 ph10 518 if (md->use_ucp)
1842     {
1843 ph10 527 c = *eptr;
1844 ph10 518 if (c == '_') cur_is_word = TRUE; else
1845 ph10 527 {
1846 ph10 518 int cat = UCD_CATEGORY(c);
1847     cur_is_word = (cat == ucp_L || cat == ucp_N);
1848 ph10 527 }
1849     }
1850     else
1851     #endif
1852 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853 nigel 77 }
1854    
1855     /* Now see if the situation is what we want */
1856    
1857     if ((*ecode++ == OP_WORD_BOUNDARY)?
1858     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1859 ph10 510 MRRETURN(MATCH_NOMATCH);
1860 nigel 77 }
1861     break;
1862    
1863     /* Match a single character type; inline for speed */
1864    
1865     case OP_ANY:
1866 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1867 ph10 345 /* Fall through */
1868    
1869 ph10 341 case OP_ALLANY:
1870 ph10 443 if (eptr++ >= md->end_subject)
1871 ph10 428 {
1872 ph10 443 SCHECK_PARTIAL();
1873 ph10 510 MRRETURN(MATCH_NOMATCH);
1874 ph10 443 }
1875 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1876 nigel 77 ecode++;
1877     break;
1878    
1879     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1880     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1881    
1882     case OP_ANYBYTE:
1883 ph10 443 if (eptr++ >= md->end_subject)
1884 ph10 428 {
1885 ph10 443 SCHECK_PARTIAL();
1886 ph10 510 MRRETURN(MATCH_NOMATCH);
1887 ph10 443 }
1888 nigel 77 ecode++;
1889     break;
1890    
1891     case OP_NOT_DIGIT:
1892 ph10 443 if (eptr >= md->end_subject)
1893 ph10 428 {
1894 ph10 443 SCHECK_PARTIAL();
1895 ph10 510 MRRETURN(MATCH_NOMATCH);
1896 ph10 443 }
1897 nigel 77 GETCHARINCTEST(c, eptr);
1898     if (
1899     #ifdef SUPPORT_UTF8
1900     c < 256 &&
1901     #endif
1902     (md->ctypes[c] & ctype_digit) != 0
1903     )
1904 ph10 510 MRRETURN(MATCH_NOMATCH);
1905 nigel 77 ecode++;
1906     break;
1907    
1908     case OP_DIGIT:
1909 ph10 443 if (eptr >= md->end_subject)
1910 ph10 428 {
1911 ph10 443 SCHECK_PARTIAL();
1912 ph10 510 MRRETURN(MATCH_NOMATCH);
1913 ph10 443 }
1914 nigel 77 GETCHARINCTEST(c, eptr);
1915     if (
1916     #ifdef SUPPORT_UTF8
1917     c >= 256 ||
1918     #endif
1919     (md->ctypes[c] & ctype_digit) == 0
1920     )
1921 ph10 510 MRRETURN(MATCH_NOMATCH);
1922 nigel 77 ecode++;
1923     break;
1924    
1925     case OP_NOT_WHITESPACE:
1926 ph10 443 if (eptr >= md->end_subject)
1927 ph10 428 {
1928 ph10 443 SCHECK_PARTIAL();
1929 ph10 510 MRRETURN(MATCH_NOMATCH);
1930 ph10 443 }
1931 nigel 77 GETCHARINCTEST(c, eptr);
1932     if (
1933     #ifdef SUPPORT_UTF8
1934     c < 256 &&
1935     #endif
1936     (md->ctypes[c] & ctype_space) != 0
1937     )
1938 ph10 510 MRRETURN(MATCH_NOMATCH);
1939 nigel 77 ecode++;
1940     break;
1941    
1942     case OP_WHITESPACE:
1943 ph10 443 if (eptr >= md->end_subject)
1944 ph10 428 {
1945 ph10 443 SCHECK_PARTIAL();
1946 ph10 510 MRRETURN(MATCH_NOMATCH);
1947 ph10 443 }
1948 nigel 77 GETCHARINCTEST(c, eptr);
1949     if (
1950     #ifdef SUPPORT_UTF8
1951     c >= 256 ||
1952     #endif
1953     (md->ctypes[c] & ctype_space) == 0
1954     )
1955 ph10 510 MRRETURN(MATCH_NOMATCH);
1956 nigel 77 ecode++;
1957     break;
1958    
1959     case OP_NOT_WORDCHAR:
1960 ph10 443 if (eptr >= md->end_subject)
1961 ph10 428 {
1962 ph10 443 SCHECK_PARTIAL();
1963 ph10 510 MRRETURN(MATCH_NOMATCH);
1964 ph10 443 }
1965 nigel 77 GETCHARINCTEST(c, eptr);
1966     if (
1967     #ifdef SUPPORT_UTF8
1968     c < 256 &&
1969     #endif
1970     (md->ctypes[c] & ctype_word) != 0
1971     )
1972 ph10 510 MRRETURN(MATCH_NOMATCH);
1973 nigel 77 ecode++;
1974     break;
1975    
1976     case OP_WORDCHAR:
1977 ph10 443 if (eptr >= md->end_subject)
1978 ph10 428 {
1979 ph10 443 SCHECK_PARTIAL();
1980 ph10 510 MRRETURN(MATCH_NOMATCH);
1981 ph10 443 }
1982 nigel 77 GETCHARINCTEST(c, eptr);
1983     if (
1984     #ifdef SUPPORT_UTF8
1985     c >= 256 ||
1986     #endif
1987     (md->ctypes[c] & ctype_word) == 0
1988     )
1989 ph10 510 MRRETURN(MATCH_NOMATCH);
1990 nigel 77 ecode++;
1991     break;
1992    
1993 nigel 93 case OP_ANYNL:
1994 ph10 443 if (eptr >= md->end_subject)
1995 ph10 428 {
1996 ph10 443 SCHECK_PARTIAL();
1997 ph10 510 MRRETURN(MATCH_NOMATCH);
1998 ph10 443 }
1999 nigel 93 GETCHARINCTEST(c, eptr);
2000     switch(c)
2001     {
2002 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2003 nigel 93 case 0x000d:
2004     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2005     break;
2006 ph10 231
2007 nigel 93 case 0x000a:
2008 ph10 231 break;
2009    
2010 nigel 93 case 0x000b:
2011     case 0x000c:
2012     case 0x0085:
2013     case 0x2028:
2014     case 0x2029:
2015 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2016 nigel 93 break;
2017     }
2018     ecode++;
2019     break;
2020    
2021 ph10 178 case OP_NOT_HSPACE:
2022 ph10 443 if (eptr >= md->end_subject)
2023 ph10 428 {
2024 ph10 443 SCHECK_PARTIAL();
2025 ph10 510 MRRETURN(MATCH_NOMATCH);
2026 ph10 443 }
2027 ph10 178 GETCHARINCTEST(c, eptr);
2028     switch(c)
2029     {
2030     default: break;
2031     case 0x09: /* HT */
2032     case 0x20: /* SPACE */
2033     case 0xa0: /* NBSP */
2034     case 0x1680: /* OGHAM SPACE MARK */
2035     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2036     case 0x2000: /* EN QUAD */
2037     case 0x2001: /* EM QUAD */
2038     case 0x2002: /* EN SPACE */
2039     case 0x2003: /* EM SPACE */
2040     case 0x2004: /* THREE-PER-EM SPACE */
2041     case 0x2005: /* FOUR-PER-EM SPACE */
2042     case 0x2006: /* SIX-PER-EM SPACE */
2043     case 0x2007: /* FIGURE SPACE */
2044     case 0x2008: /* PUNCTUATION SPACE */
2045     case 0x2009: /* THIN SPACE */
2046     case 0x200A: /* HAIR SPACE */
2047     case 0x202f: /* NARROW NO-BREAK SPACE */
2048     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2049     case 0x3000: /* IDEOGRAPHIC SPACE */
2050 ph10 510 MRRETURN(MATCH_NOMATCH);
2051 ph10 178 }
2052     ecode++;
2053     break;
2054    
2055     case OP_HSPACE:
2056 ph10 443 if (eptr >= md->end_subject)
2057 ph10 428 {
2058 ph10 443 SCHECK_PARTIAL();
2059 ph10 510 MRRETURN(MATCH_NOMATCH);
2060 ph10 443 }
2061 ph10 178 GETCHARINCTEST(c, eptr);
2062     switch(c)
2063     {
2064 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2065 ph10 178 case 0x09: /* HT */
2066     case 0x20: /* SPACE */
2067     case 0xa0: /* NBSP */
2068     case 0x1680: /* OGHAM SPACE MARK */
2069     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2070     case 0x2000: /* EN QUAD */
2071     case 0x2001: /* EM QUAD */
2072     case 0x2002: /* EN SPACE */
2073     case 0x2003: /* EM SPACE */
2074     case 0x2004: /* THREE-PER-EM SPACE */
2075     case 0x2005: /* FOUR-PER-EM SPACE */
2076     case 0x2006: /* SIX-PER-EM SPACE */
2077     case 0x2007: /* FIGURE SPACE */
2078     case 0x2008: /* PUNCTUATION SPACE */
2079     case 0x2009: /* THIN SPACE */
2080     case 0x200A: /* HAIR SPACE */
2081     case 0x202f: /* NARROW NO-BREAK SPACE */
2082     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2083     case 0x3000: /* IDEOGRAPHIC SPACE */
2084     break;
2085     }
2086     ecode++;
2087     break;
2088    
2089     case OP_NOT_VSPACE:
2090 ph10 443 if (eptr >= md->end_subject)
2091 ph10 428 {
2092 ph10 443 SCHECK_PARTIAL();
2093 ph10 510 MRRETURN(MATCH_NOMATCH);
2094 ph10 443 }
2095 ph10 178 GETCHARINCTEST(c, eptr);
2096     switch(c)
2097     {
2098     default: break;
2099     case 0x0a: /* LF */
2100     case 0x0b: /* VT */
2101     case 0x0c: /* FF */
2102     case 0x0d: /* CR */
2103     case 0x85: /* NEL */
2104     case 0x2028: /* LINE SEPARATOR */
2105     case 0x2029: /* PARAGRAPH SEPARATOR */
2106 ph10 510 MRRETURN(MATCH_NOMATCH);
2107 ph10 178 }
2108     ecode++;
2109     break;
2110    
2111     case OP_VSPACE:
2112 ph10 443 if (eptr >= md->end_subject)
2113 ph10 428 {
2114 ph10 443 SCHECK_PARTIAL();
2115 ph10 510 MRRETURN(MATCH_NOMATCH);
2116 ph10 443 }
2117 ph10 178 GETCHARINCTEST(c, eptr);
2118     switch(c)
2119     {
2120 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2121 ph10 178 case 0x0a: /* LF */
2122     case 0x0b: /* VT */
2123     case 0x0c: /* FF */
2124     case 0x0d: /* CR */
2125     case 0x85: /* NEL */
2126     case 0x2028: /* LINE SEPARATOR */
2127     case 0x2029: /* PARAGRAPH SEPARATOR */
2128     break;
2129     }
2130     ecode++;
2131     break;
2132    
2133 nigel 77 #ifdef SUPPORT_UCP
2134     /* Check the next character by Unicode property. We will get here only
2135     if the support is in the binary; otherwise a compile-time error occurs. */
2136    
2137     case OP_PROP:
2138     case OP_NOTPROP:
2139 ph10 443 if (eptr >= md->end_subject)
2140 ph10 428 {
2141 ph10 443 SCHECK_PARTIAL();
2142 ph10 510 MRRETURN(MATCH_NOMATCH);
2143 ph10 443 }
2144 nigel 77 GETCHARINCTEST(c, eptr);
2145     {
2146 ph10 384 const ucd_record *prop = GET_UCD(c);
2147 nigel 77
2148 nigel 87 switch(ecode[1])
2149     {
2150     case PT_ANY:
2151 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2152 nigel 87 break;
2153 nigel 77
2154 nigel 87 case PT_LAMP:
2155 ph10 349 if ((prop->chartype == ucp_Lu ||
2156     prop->chartype == ucp_Ll ||
2157     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2158 ph10 510 MRRETURN(MATCH_NOMATCH);
2159 ph10 517 break;
2160 nigel 87
2161     case PT_GC:
2162 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2163 ph10 510 MRRETURN(MATCH_NOMATCH);
2164 nigel 87 break;
2165    
2166     case PT_PC:
2167 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2168 ph10 510 MRRETURN(MATCH_NOMATCH);
2169 nigel 87 break;
2170    
2171     case PT_SC:
2172 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2173 ph10 510 MRRETURN(MATCH_NOMATCH);
2174 nigel 87 break;
2175 ph10 527
2176 ph10 517 /* These are specials */
2177 ph10 527
2178 ph10 517 case PT_ALNUM:
2179     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2180     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2181     MRRETURN(MATCH_NOMATCH);
2182 ph10 527 break;
2183    
2184 ph10 517 case PT_SPACE: /* Perl space */
2185     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2186     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187     == (op == OP_NOTPROP))
2188     MRRETURN(MATCH_NOMATCH);
2189 ph10 527 break;
2190    
2191 ph10 517 case PT_PXSPACE: /* POSIX space */
2192     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2193 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194 ph10 517 c == CHAR_FF || c == CHAR_CR)
2195     == (op == OP_NOTPROP))
2196     MRRETURN(MATCH_NOMATCH);
2197 ph10 527 break;
2198 nigel 87
2199 ph10 527 case PT_WORD:
2200 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2201 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2202 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203     MRRETURN(MATCH_NOMATCH);
2204 ph10 527 break;
2205    
2206 ph10 517 /* This should never occur */
2207    
2208 nigel 87 default:
2209     RRETURN(PCRE_ERROR_INTERNAL);
2210 nigel 77 }
2211 nigel 87
2212     ecode += 3;
2213 nigel 77 }
2214     break;
2215    
2216     /* Match an extended Unicode sequence. We will get here only if the support
2217     is in the binary; otherwise a compile-time error occurs. */
2218    
2219     case OP_EXTUNI:
2220 ph10 443 if (eptr >= md->end_subject)
2221 ph10 428 {
2222 ph10 443 SCHECK_PARTIAL();
2223 ph10 510 MRRETURN(MATCH_NOMATCH);
2224 ph10 443 }
2225 nigel 77 GETCHARINCTEST(c, eptr);
2226     {
2227 ph10 349 int category = UCD_CATEGORY(c);
2228 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2229 nigel 77 while (eptr < md->end_subject)
2230     {
2231     int len = 1;
2232     if (!utf8) c = *eptr; else
2233     {
2234     GETCHARLEN(c, eptr, len);
2235     }
2236 ph10 349 category = UCD_CATEGORY(c);
2237 nigel 77 if (category != ucp_M) break;
2238     eptr += len;
2239     }
2240     }
2241     ecode++;
2242     break;
2243     #endif
2244    
2245    
2246     /* Match a back reference, possibly repeatedly. Look past the end of the
2247     item to see if there is repeat information following. The code is similar
2248     to that for character classes, but repeated for efficiency. Then obey
2249     similar code to character type repeats - written out again for speed.
2250     However, if the referenced string is the empty string, always treat
2251     it as matched, any number of times (otherwise there could be infinite
2252     loops). */
2253    
2254     case OP_REF:
2255     {
2256     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2257 ph10 345 ecode += 3;
2258    
2259 ph10 336 /* If the reference is unset, there are two possibilities:
2260 ph10 345
2261 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2262     than the amount of subject left; this ensures that every attempt at a
2263     match fails. We can't just fail here, because of the possibility of
2264     quantifiers with zero minima.
2265 ph10 345
2266     (b) If the JavaScript compatibility flag is set, set the length to zero
2267     so that the back reference matches an empty string.
2268    
2269     Otherwise, set the length to the length of what was matched by the
2270 ph10 336 referenced subpattern. */
2271 ph10 345
2272 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2273 ph10 530 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274 ph10 336 else
2275     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276 nigel 77
2277     /* Set up for repetition, or handle the non-repeated case */
2278    
2279     switch (*ecode)
2280     {
2281     case OP_CRSTAR:
2282     case OP_CRMINSTAR:
2283     case OP_CRPLUS:
2284     case OP_CRMINPLUS:
2285     case OP_CRQUERY:
2286     case OP_CRMINQUERY:
2287     c = *ecode++ - OP_CRSTAR;
2288     minimize = (c & 1) != 0;
2289     min = rep_min[c]; /* Pick up values from tables; */
2290     max = rep_max[c]; /* zero for max => infinity */
2291     if (max == 0) max = INT_MAX;
2292     break;
2293    
2294     case OP_CRRANGE:
2295     case OP_CRMINRANGE:
2296     minimize = (*ecode == OP_CRMINRANGE);
2297     min = GET2(ecode, 1);
2298     max = GET2(ecode, 3);
2299     if (max == 0) max = INT_MAX;
2300     ecode += 5;
2301     break;
2302    
2303     default: /* No repeat follows */
2304 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2305 ph10 428 {
2306 ph10 443 CHECK_PARTIAL();
2307 ph10 510 MRRETURN(MATCH_NOMATCH);
2308 ph10 443 }
2309 nigel 77 eptr += length;
2310     continue; /* With the main loop */
2311     }
2312    
2313     /* If the length of the reference is zero, just continue with the
2314     main loop. */
2315 ph10 443
2316 nigel 77 if (length == 0) continue;
2317    
2318     /* First, ensure the minimum number of matches are present. We get back
2319     the length of the reference string explicitly rather than passing the
2320     address of eptr, so that eptr can be a register variable. */
2321    
2322     for (i = 1; i <= min; i++)
2323     {
2324 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2325 ph10 426 {
2326 ph10 427 CHECK_PARTIAL();
2327 ph10 510 MRRETURN(MATCH_NOMATCH);
2328 ph10 427 }
2329 nigel 77 eptr += length;
2330     }
2331    
2332     /* If min = max, continue at the same level without recursion.
2333     They are not both allowed to be zero. */
2334    
2335     if (min == max) continue;
2336    
2337     /* If minimizing, keep trying and advancing the pointer */
2338    
2339     if (minimize)
2340     {
2341     for (fi = min;; fi++)
2342     {
2343 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2344 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2346 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2347 ph10 426 {
2348 ph10 427 CHECK_PARTIAL();
2349 ph10 510 MRRETURN(MATCH_NOMATCH);
2350 ph10 427 }
2351 nigel 77 eptr += length;
2352     }
2353     /* Control never gets here */
2354     }
2355    
2356     /* If maximizing, find the longest string and work backwards */
2357    
2358     else
2359     {
2360     pp = eptr;
2361     for (i = min; i < max; i++)
2362     {
2363 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2364 ph10 462 {
2365 ph10 463 CHECK_PARTIAL();
2366 ph10 462 break;
2367 ph10 463 }
2368 nigel 77 eptr += length;
2369     }
2370     while (eptr >= pp)
2371     {
2372 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2373 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2374     eptr -= length;
2375     }
2376 ph10 510 MRRETURN(MATCH_NOMATCH);
2377 nigel 77 }
2378     }
2379     /* Control never gets here */
2380    
2381     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2382     used when all the characters in the class have values in the range 0-255,
2383     and either the matching is caseful, or the characters are in the range
2384     0-127 when UTF-8 processing is enabled. The only difference between
2385     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2386     encountered.
2387    
2388     First, look past the end of the item to see if there is repeat information
2389     following. Then obey similar code to character type repeats - written out
2390     again for speed. */
2391    
2392     case OP_NCLASS:
2393     case OP_CLASS:
2394     {
2395     data = ecode + 1; /* Save for matching */
2396     ecode += 33; /* Advance past the item */
2397    
2398     switch (*ecode)
2399     {
2400     case OP_CRSTAR:
2401     case OP_CRMINSTAR:
2402     case OP_CRPLUS:
2403     case OP_CRMINPLUS:
2404     case OP_CRQUERY:
2405     case OP_CRMINQUERY:
2406     c = *ecode++ - OP_CRSTAR;
2407     minimize = (c & 1) != 0;
2408     min = rep_min[c]; /* Pick up values from tables; */
2409     max = rep_max[c]; /* zero for max => infinity */
2410     if (max == 0) max = INT_MAX;
2411     break;
2412    
2413     case OP_CRRANGE:
2414     case OP_CRMINRANGE:
2415     minimize = (*ecode == OP_CRMINRANGE);
2416     min = GET2(ecode, 1);
2417     max = GET2(ecode, 3);
2418     if (max == 0) max = INT_MAX;
2419     ecode += 5;
2420     break;
2421    
2422     default: /* No repeat follows */
2423     min = max = 1;
2424     break;
2425     }
2426    
2427     /* First, ensure the minimum number of matches are present. */
2428    
2429     #ifdef SUPPORT_UTF8
2430     /* UTF-8 mode */
2431     if (utf8)
2432     {
2433     for (i = 1; i <= min; i++)
2434     {
2435 ph10 427 if (eptr >= md->end_subject)
2436 ph10 426 {
2437 ph10 428 SCHECK_PARTIAL();
2438 ph10 510 MRRETURN(MATCH_NOMATCH);
2439 ph10 427 }
2440 nigel 77 GETCHARINC(c, eptr);
2441     if (c > 255)
2442     {
2443 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2444 nigel 77 }
2445     else
2446     {
2447 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448 nigel 77 }
2449     }
2450     }
2451     else
2452     #endif
2453     /* Not UTF-8 mode */
2454     {
2455     for (i = 1; i <= min; i++)
2456     {
2457 ph10 427 if (eptr >= md->end_subject)
2458 ph10 426 {
2459 ph10 428 SCHECK_PARTIAL();
2460 ph10 510 MRRETURN(MATCH_NOMATCH);
2461 ph10 427 }
2462 nigel 77 c = *eptr++;
2463 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2464 nigel 77 }
2465     }
2466    
2467     /* If max == min we can continue with the main loop without the
2468     need to recurse. */
2469    
2470     if (min == max) continue;
2471    
2472     /* If minimizing, keep testing the rest of the expression and advancing
2473     the pointer while it matches the class. */
2474    
2475     if (minimize)
2476     {
2477     #ifdef SUPPORT_UTF8
2478     /* UTF-8 mode */
2479     if (utf8)
2480     {
2481     for (fi = min;; fi++)
2482     {
2483 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2484 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2485 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2486 ph10 427 if (eptr >= md->end_subject)
2487 ph10 426 {
2488 ph10 427 SCHECK_PARTIAL();
2489 ph10 510 MRRETURN(MATCH_NOMATCH);
2490 ph10 427 }
2491 nigel 77 GETCHARINC(c, eptr);
2492     if (c > 255)
2493     {
2494 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2495 nigel 77 }
2496     else
2497     {
2498 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2499 nigel 77 }
2500     }
2501     }
2502     else
2503     #endif
2504     /* Not UTF-8 mode */
2505     {
2506     for (fi = min;; fi++)
2507     {
2508 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2509 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2510 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2511 ph10 427 if (eptr >= md->end_subject)
2512 ph10 426 {
2513 ph10 427 SCHECK_PARTIAL();
2514 ph10 510 MRRETURN(MATCH_NOMATCH);
2515 ph10 427 }
2516 nigel 77 c = *eptr++;
2517 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2518 nigel 77 }
2519     }
2520     /* Control never gets here */
2521     }
2522    
2523     /* If maximizing, find the longest possible run, then work backwards. */
2524    
2525     else
2526     {
2527     pp = eptr;
2528    
2529     #ifdef SUPPORT_UTF8
2530     /* UTF-8 mode */
2531     if (utf8)
2532     {
2533     for (i = min; i < max; i++)
2534     {
2535     int len = 1;
2536 ph10 463 if (eptr >= md->end_subject)
2537 ph10 462 {
2538 ph10 463 SCHECK_PARTIAL();
2539 ph10 462 break;
2540 ph10 463 }
2541 nigel 77 GETCHARLEN(c, eptr, len);
2542     if (c > 255)
2543     {
2544     if (op == OP_CLASS) break;
2545     }
2546     else
2547     {
2548     if ((data[c/8] & (1 << (c&7))) == 0) break;
2549     }
2550     eptr += len;
2551     }
2552     for (;;)
2553     {
2554 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2555 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556     if (eptr-- == pp) break; /* Stop if tried at original pos */
2557     BACKCHAR(eptr);
2558     }
2559     }
2560     else
2561     #endif
2562     /* Not UTF-8 mode */
2563     {
2564     for (i = min; i < max; i++)
2565     {
2566 ph10 463 if (eptr >= md->end_subject)
2567 ph10 462 {
2568 ph10 463 SCHECK_PARTIAL();
2569 ph10 462 break;
2570 ph10 463 }
2571 nigel 77 c = *eptr;
2572     if ((data[c/8] & (1 << (c&7))) == 0) break;
2573     eptr++;
2574     }
2575     while (eptr >= pp)
2576     {
2577 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2578 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2579 nigel 77 eptr--;
2580     }
2581     }
2582    
2583 ph10 510 MRRETURN(MATCH_NOMATCH);
2584 nigel 77 }
2585     }
2586     /* Control never gets here */
2587    
2588    
2589     /* Match an extended character class. This opcode is encountered only
2590 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2591     mode, because Unicode properties are supported in non-UTF-8 mode. */
2592 nigel 77
2593     #ifdef SUPPORT_UTF8
2594     case OP_XCLASS:
2595     {
2596     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2597     ecode += GET(ecode, 1); /* Advance past the item */
2598    
2599     switch (*ecode)
2600     {
2601     case OP_CRSTAR:
2602     case OP_CRMINSTAR:
2603     case OP_CRPLUS:
2604     case OP_CRMINPLUS:
2605     case OP_CRQUERY:
2606     case OP_CRMINQUERY:
2607     c = *ecode++ - OP_CRSTAR;
2608     minimize = (c & 1) != 0;
2609     min = rep_min[c]; /* Pick up values from tables; */
2610     max = rep_max[c]; /* zero for max => infinity */
2611     if (max == 0) max = INT_MAX;
2612     break;
2613    
2614     case OP_CRRANGE:
2615     case OP_CRMINRANGE:
2616     minimize = (*ecode == OP_CRMINRANGE);
2617     min = GET2(ecode, 1);
2618     max = GET2(ecode, 3);
2619     if (max == 0) max = INT_MAX;
2620     ecode += 5;
2621     break;
2622    
2623     default: /* No repeat follows */
2624     min = max = 1;
2625     break;
2626     }
2627    
2628     /* First, ensure the minimum number of matches are present. */
2629    
2630     for (i = 1; i <= min; i++)
2631     {
2632 ph10 427 if (eptr >= md->end_subject)
2633 ph10 426 {
2634     SCHECK_PARTIAL();
2635 ph10 510 MRRETURN(MATCH_NOMATCH);
2636 ph10 427 }
2637 ph10 384 GETCHARINCTEST(c, eptr);
2638 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2639 nigel 77 }
2640    
2641     /* If max == min we can continue with the main loop without the
2642     need to recurse. */
2643    
2644     if (min == max) continue;
2645    
2646     /* If minimizing, keep testing the rest of the expression and advancing
2647     the pointer while it matches the class. */
2648    
2649     if (minimize)
2650     {
2651     for (fi = min;; fi++)
2652     {
2653 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2654 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2655 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2656 ph10 427 if (eptr >= md->end_subject)
2657 ph10 426 {
2658 ph10 427 SCHECK_PARTIAL();
2659 ph10 510 MRRETURN(MATCH_NOMATCH);
2660 ph10 427 }
2661 ph10 384 GETCHARINCTEST(c, eptr);
2662 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2663 nigel 77 }
2664     /* Control never gets here */
2665     }
2666    
2667     /* If maximizing, find the longest possible run, then work backwards. */
2668    
2669     else
2670     {
2671     pp = eptr;
2672     for (i = min; i < max; i++)
2673     {
2674     int len = 1;
2675 ph10 463 if (eptr >= md->end_subject)
2676 ph10 462 {
2677 ph10 463 SCHECK_PARTIAL();
2678 ph10 462 break;
2679 ph10 463 }
2680 ph10 384 GETCHARLENTEST(c, eptr, len);
2681 nigel 77 if (!_pcre_xclass(c, data)) break;
2682     eptr += len;
2683     }
2684     for(;;)
2685     {
2686 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2687 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688     if (eptr-- == pp) break; /* Stop if tried at original pos */
2689 ph10 214 if (utf8) BACKCHAR(eptr);
2690 nigel 77 }
2691 ph10 510 MRRETURN(MATCH_NOMATCH);
2692 nigel 77 }
2693    
2694     /* Control never gets here */
2695     }
2696     #endif /* End of XCLASS */
2697    
2698     /* Match a single character, casefully */
2699    
2700     case OP_CHAR:
2701     #ifdef SUPPORT_UTF8
2702     if (utf8)
2703     {
2704     length = 1;
2705     ecode++;
2706     GETCHARLEN(fc, ecode, length);
2707 ph10 443 if (length > md->end_subject - eptr)
2708 ph10 428 {
2709     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2710 ph10 510 MRRETURN(MATCH_NOMATCH);
2711 ph10 443 }
2712 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2713 nigel 77 }
2714     else
2715     #endif
2716    
2717     /* Non-UTF-8 mode */
2718     {
2719 ph10 443 if (md->end_subject - eptr < 1)
2720 ph10 428 {
2721     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2722 ph10 510 MRRETURN(MATCH_NOMATCH);
2723 ph10 443 }
2724 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2725 nigel 77 ecode += 2;
2726     }
2727     break;
2728    
2729     /* Match a single character, caselessly */
2730    
2731     case OP_CHARNC:
2732     #ifdef SUPPORT_UTF8
2733     if (utf8)
2734     {
2735     length = 1;
2736     ecode++;
2737     GETCHARLEN(fc, ecode, length);
2738    
2739 ph10 443 if (length > md->end_subject - eptr)
2740 ph10 428 {
2741     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2742 ph10 510 MRRETURN(MATCH_NOMATCH);
2743 ph10 443 }
2744 nigel 77
2745     /* If the pattern character's value is < 128, we have only one byte, and
2746     can use the fast lookup table. */
2747    
2748     if (fc < 128)
2749     {
2750 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2751 nigel 77 }
2752    
2753     /* Otherwise we must pick up the subject character */
2754    
2755     else
2756     {
2757 nigel 93 unsigned int dc;
2758 nigel 77 GETCHARINC(dc, eptr);
2759     ecode += length;
2760    
2761     /* If we have Unicode property support, we can use it to test the other
2762 nigel 87 case of the character, if there is one. */
2763 nigel 77
2764     if (fc != dc)
2765     {
2766     #ifdef SUPPORT_UCP
2767 ph10 349 if (dc != UCD_OTHERCASE(fc))
2768 nigel 77 #endif
2769 ph10 510 MRRETURN(MATCH_NOMATCH);
2770 nigel 77 }
2771     }
2772     }
2773     else
2774     #endif /* SUPPORT_UTF8 */
2775    
2776     /* Non-UTF-8 mode */
2777     {
2778 ph10 443 if (md->end_subject - eptr < 1)
2779 ph10 428 {
2780 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2781 ph10 510 MRRETURN(MATCH_NOMATCH);
2782 ph10 443 }
2783 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2784 nigel 77 ecode += 2;
2785     }
2786     break;
2787    
2788 nigel 93 /* Match a single character repeatedly. */
2789 nigel 77
2790     case OP_EXACT:
2791     min = max = GET2(ecode, 1);
2792     ecode += 3;
2793     goto REPEATCHAR;
2794    
2795 nigel 93 case OP_POSUPTO:
2796     possessive = TRUE;
2797     /* Fall through */
2798    
2799 nigel 77 case OP_UPTO:
2800     case OP_MINUPTO:
2801     min = 0;
2802     max = GET2(ecode, 1);
2803     minimize = *ecode == OP_MINUPTO;
2804     ecode += 3;
2805     goto REPEATCHAR;
2806    
2807 nigel 93 case OP_POSSTAR:
2808     possessive = TRUE;
2809     min = 0;
2810     max = INT_MAX;
2811     ecode++;
2812     goto REPEATCHAR;
2813    
2814     case OP_POSPLUS:
2815     possessive = TRUE;
2816     min = 1;
2817     max = INT_MAX;
2818     ecode++;
2819     goto REPEATCHAR;
2820    
2821     case OP_POSQUERY:
2822     possessive = TRUE;
2823     min = 0;
2824     max = 1;
2825     ecode++;
2826     goto REPEATCHAR;
2827    
2828 nigel 77 case OP_STAR:
2829     case OP_MINSTAR:
2830     case OP_PLUS:
2831     case OP_MINPLUS:
2832     case OP_QUERY:
2833     case OP_MINQUERY:
2834     c = *ecode++ - OP_STAR;
2835     minimize = (c & 1) != 0;
2836 ph10 443
2837 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2838     max = rep_max[c]; /* zero for max => infinity */
2839     if (max == 0) max = INT_MAX;
2840    
2841 ph10 426 /* Common code for all repeated single-character matches. */
2842 nigel 77
2843     REPEATCHAR:
2844     #ifdef SUPPORT_UTF8
2845     if (utf8)
2846     {
2847     length = 1;
2848     charptr = ecode;
2849     GETCHARLEN(fc, ecode, length);
2850     ecode += length;
2851    
2852     /* Handle multibyte character matching specially here. There is
2853     support for caseless matching if UCP support is present. */
2854    
2855     if (length > 1)
2856     {
2857     #ifdef SUPPORT_UCP
2858 nigel 93 unsigned int othercase;
2859 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2860 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2861 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2862 ph10 115 else oclength = 0;
2863 nigel 77 #endif /* SUPPORT_UCP */
2864    
2865     for (i = 1; i <= min; i++)
2866     {
2867 ph10 426 if (eptr <= md->end_subject - length &&
2868     memcmp(eptr, charptr, length) == 0) eptr += length;
2869 ph10 123 #ifdef SUPPORT_UCP
2870 ph10 426 else if (oclength > 0 &&
2871     eptr <= md->end_subject - oclength &&
2872     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2873     #endif /* SUPPORT_UCP */
2874 nigel 77 else
2875     {
2876 ph10 426 CHECK_PARTIAL();
2877 ph10 510 MRRETURN(MATCH_NOMATCH);
2878 nigel 77 }
2879     }
2880    
2881     if (min == max) continue;
2882    
2883     if (minimize)
2884     {
2885     for (fi = min;; fi++)
2886     {
2887 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2888 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890 ph10 426 if (eptr <= md->end_subject - length &&
2891     memcmp(eptr, charptr, length) == 0) eptr += length;
2892 ph10 123 #ifdef SUPPORT_UCP
2893 ph10 426 else if (oclength > 0 &&
2894     eptr <= md->end_subject - oclength &&
2895     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2896     #endif /* SUPPORT_UCP */
2897 nigel 77 else
2898     {
2899 ph10 426 CHECK_PARTIAL();
2900 ph10 510 MRRETURN(MATCH_NOMATCH);
2901 nigel 77 }
2902     }
2903     /* Control never gets here */
2904     }
2905 nigel 93
2906     else /* Maximize */
2907 nigel 77 {
2908     pp = eptr;
2909     for (i = min; i < max; i++)
2910     {
2911 ph10 426 if (eptr <= md->end_subject - length &&
2912     memcmp(eptr, charptr, length) == 0) eptr += length;
2913 ph10 123 #ifdef SUPPORT_UCP
2914 ph10 426 else if (oclength > 0 &&
2915     eptr <= md->end_subject - oclength &&
2916     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2917     #endif /* SUPPORT_UCP */
2918 ph10 463 else
2919 ph10 462 {
2920 ph10 463 CHECK_PARTIAL();
2921 ph10 462 break;
2922 ph10 463 }
2923 nigel 77 }
2924 nigel 93
2925     if (possessive) continue;
2926 ph10 427
2927 ph10 120 for(;;)
2928 ph10 426 {
2929     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2930     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2931 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2932 ph10 115 #ifdef SUPPORT_UCP
2933 ph10 426 eptr--;
2934     BACKCHAR(eptr);
2935 ph10 123 #else /* without SUPPORT_UCP */
2936 ph10 426 eptr -= length;
2937 ph10 123 #endif /* SUPPORT_UCP */
2938 ph10 426 }
2939 nigel 77 }
2940     /* Control never gets here */
2941     }
2942    
2943     /* If the length of a UTF-8 character is 1, we fall through here, and
2944     obey the code as for non-UTF-8 characters below, though in this case the
2945     value of fc will always be < 128. */
2946     }
2947     else
2948     #endif /* SUPPORT_UTF8 */
2949    
2950     /* When not in UTF-8 mode, load a single-byte character. */
2951    
2952 ph10 426 fc = *ecode++;
2953 ph10 443
2954 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2955     may not be in UTF-8 mode. The code is duplicated for the caseless and
2956     caseful cases, for speed, since matching characters is likely to be quite
2957     common. First, ensure the minimum number of matches are present. If min =
2958     max, continue at the same level without recursing. Otherwise, if
2959     minimizing, keep trying the rest of the expression and advancing one
2960     matching character if failing, up to the maximum. Alternatively, if
2961     maximizing, find the maximum number of characters and work backwards. */
2962    
2963     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2964     max, eptr));
2965    
2966     if ((ims & PCRE_CASELESS) != 0)
2967     {
2968     fc = md->lcc[fc];
2969     for (i = 1; i <= min; i++)
2970 ph10 426 {
2971     if (eptr >= md->end_subject)
2972     {
2973     SCHECK_PARTIAL();
2974 ph10 510 MRRETURN(MATCH_NOMATCH);
2975 ph10 426 }
2976 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2977 ph10 426 }
2978 nigel 77 if (min == max) continue;
2979     if (minimize)
2980     {
2981     for (fi = min;; fi++)
2982     {
2983 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2984 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2985 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2986 ph10 426 if (eptr >= md->end_subject)
2987     {
2988 ph10 427 SCHECK_PARTIAL();
2989 ph10 510 MRRETURN(MATCH_NOMATCH);
2990 ph10 426 }
2991 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2992 nigel 77 }
2993     /* Control never gets here */
2994     }
2995 nigel 93 else /* Maximize */
2996 nigel 77 {
2997     pp = eptr;
2998     for (i = min; i < max; i++)
2999     {
3000 ph10 463 if (eptr >= md->end_subject)
3001 ph10 462 {
3002     SCHECK_PARTIAL();
3003     break;
3004 ph10 463 }
3005 ph10 462 if (fc != md->lcc[*eptr]) break;
3006 nigel 77 eptr++;
3007     }
3008 ph10 427
3009 nigel 93 if (possessive) continue;
3010 ph10 427
3011 nigel 77 while (eptr >= pp)
3012     {
3013 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3014 nigel 77 eptr--;
3015     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3016     }
3017 ph10 510 MRRETURN(MATCH_NOMATCH);
3018 nigel 77 }
3019     /* Control never gets here */
3020     }
3021    
3022     /* Caseful comparisons (includes all multi-byte characters) */
3023    
3024     else
3025     {
3026 ph10 427 for (i = 1; i <= min; i++)
3027 ph10 426 {
3028     if (eptr >= md->end_subject)
3029     {
3030     SCHECK_PARTIAL();
3031 ph10 510 MRRETURN(MATCH_NOMATCH);
3032 ph10 426 }
3033 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3034 ph10 427 }
3035 ph10 443
3036 nigel 77 if (min == max) continue;
3037 ph10 443
3038 nigel 77 if (minimize)
3039     {
3040     for (fi = min;; fi++)
3041     {
3042 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3043 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3044 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3045 ph10 426 if (eptr >= md->end_subject)
3046 ph10 427 {
3047 ph10 426 SCHECK_PARTIAL();
3048 ph10 510 MRRETURN(MATCH_NOMATCH);
3049 ph10 427 }
3050 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051 nigel 77 }
3052     /* Control never gets here */
3053     }
3054 nigel 93 else /* Maximize */
3055 nigel 77 {
3056     pp = eptr;
3057     for (i = min; i < max; i++)
3058     {
3059 ph10 463 if (eptr >= md->end_subject)
3060 ph10 462 {
3061 ph10 463 SCHECK_PARTIAL();
3062 ph10 462 break;
3063 ph10 463 }
3064 ph10 462 if (fc != *eptr) break;
3065 nigel 77 eptr++;
3066     }
3067 nigel 93 if (possessive) continue;
3068 ph10 443
3069 nigel 77 while (eptr >= pp)
3070     {
3071 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3072 nigel 77 eptr--;
3073     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074     }
3075 ph10 510 MRRETURN(MATCH_NOMATCH);
3076 nigel 77 }
3077     }
3078     /* Control never gets here */
3079    
3080     /* Match a negated single one-byte character. The character we are
3081     checking can be multibyte. */
3082    
3083     case OP_NOT:
3084 ph10 443 if (eptr >= md->end_subject)
3085 ph10 428 {
3086 ph10 443 SCHECK_PARTIAL();
3087 ph10 510 MRRETURN(MATCH_NOMATCH);
3088 ph10 443 }
3089 nigel 77 ecode++;
3090     GETCHARINCTEST(c, eptr);
3091     if ((ims & PCRE_CASELESS) != 0)
3092     {
3093     #ifdef SUPPORT_UTF8
3094     if (c < 256)
3095     #endif
3096     c = md->lcc[c];
3097 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3098 nigel 77 }
3099     else
3100     {
3101 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3102 nigel 77 }
3103     break;
3104    
3105     /* Match a negated single one-byte character repeatedly. This is almost a
3106     repeat of the code for a repeated single character, but I haven't found a
3107     nice way of commoning these up that doesn't require a test of the
3108     positive/negative option for each character match. Maybe that wouldn't add
3109     very much to the time taken, but character matching *is* what this is all
3110     about... */
3111    
3112     case OP_NOTEXACT:
3113     min = max = GET2(ecode, 1);
3114     ecode += 3;
3115     goto REPEATNOTCHAR;
3116    
3117     case OP_NOTUPTO:
3118     case OP_NOTMINUPTO:
3119     min = 0;
3120     max = GET2(ecode, 1);
3121     minimize = *ecode == OP_NOTMINUPTO;
3122     ecode += 3;
3123     goto REPEATNOTCHAR;
3124    
3125 nigel 93 case OP_NOTPOSSTAR:
3126     possessive = TRUE;
3127     min = 0;
3128     max = INT_MAX;
3129     ecode++;
3130     goto REPEATNOTCHAR;
3131    
3132     case OP_NOTPOSPLUS:
3133     possessive = TRUE;
3134     min = 1;
3135     max = INT_MAX;
3136     ecode++;
3137     goto REPEATNOTCHAR;
3138    
3139     case OP_NOTPOSQUERY:
3140     possessive = TRUE;
3141     min = 0;
3142     max = 1;
3143     ecode++;
3144     goto REPEATNOTCHAR;
3145    
3146     case OP_NOTPOSUPTO:
3147     possessive = TRUE;
3148     min = 0;
3149     max = GET2(ecode, 1);
3150     ecode += 3;
3151     goto REPEATNOTCHAR;
3152    
3153 nigel 77 case OP_NOTSTAR:
3154     case OP_NOTMINSTAR:
3155     case OP_NOTPLUS:
3156     case OP_NOTMINPLUS:
3157     case OP_NOTQUERY:
3158     case OP_NOTMINQUERY:
3159     c = *ecode++ - OP_NOTSTAR;
3160     minimize = (c & 1) != 0;
3161     min = rep_min[c]; /* Pick up values from tables; */
3162     max = rep_max[c]; /* zero for max => infinity */
3163     if (max == 0) max = INT_MAX;
3164    
3165 ph10 426 /* Common code for all repeated single-byte matches. */
3166 nigel 77
3167     REPEATNOTCHAR:
3168     fc = *ecode++;
3169    
3170     /* The code is duplicated for the caseless and caseful cases, for speed,
3171     since matching characters is likely to be quite common. First, ensure the
3172     minimum number of matches are present. If min = max, continue at the same
3173     level without recursing. Otherwise, if minimizing, keep trying the rest of
3174     the expression and advancing one matching character if failing, up to the
3175     maximum. Alternatively, if maximizing, find the maximum number of
3176     characters and work backwards. */
3177    
3178     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3179     max, eptr));
3180    
3181     if ((ims & PCRE_CASELESS) != 0)
3182     {
3183     fc = md->lcc[fc];
3184    
3185     #ifdef SUPPORT_UTF8
3186     /* UTF-8 mode */
3187     if (utf8)
3188     {
3189 nigel 93 register unsigned int d;
3190 nigel 77 for (i = 1; i <= min; i++)
3191     {
3192 ph10 426 if (eptr >= md->end_subject)
3193     {
3194     SCHECK_PARTIAL();
3195 ph10 510 MRRETURN(MATCH_NOMATCH);
3196 ph10 427 }
3197 nigel 77 GETCHARINC(d, eptr);
3198     if (d < 256) d = md->lcc[d];
3199 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3200 nigel 77 }
3201     }
3202     else
3203     #endif
3204    
3205     /* Not UTF-8 mode */
3206     {
3207     for (i = 1; i <= min; i++)
3208 ph10 426 {
3209     if (eptr >= md->end_subject)
3210     {
3211     SCHECK_PARTIAL();
3212 ph10 510 MRRETURN(MATCH_NOMATCH);
3213 ph10 427 }
3214 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3215 ph10 427 }
3216 nigel 77 }
3217    
3218     if (min == max) continue;
3219    
3220     if (minimize)
3221     {
3222     #ifdef SUPPORT_UTF8
3223     /* UTF-8 mode */
3224     if (utf8)
3225     {
3226 nigel 93 register unsigned int d;
3227 nigel 77 for (fi = min;; fi++)
3228     {
3229 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3230 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3231 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3232 ph10 427 if (eptr >= md->end_subject)
3233 ph10 426 {
3234 ph10 427 SCHECK_PARTIAL();
3235 ph10 510 MRRETURN(MATCH_NOMATCH);
3236 ph10 427 }
3237 nigel 77 GETCHARINC(d, eptr);
3238     if (d < 256) d = md->lcc[d];
3239 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3240 nigel 77 }
3241     }
3242     else
3243     #endif
3244     /* Not UTF-8 mode */
3245     {
3246     for (fi = min;; fi++)
3247     {
3248 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3249 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3250 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3251 ph10 426 if (eptr >= md->end_subject)
3252     {
3253     SCHECK_PARTIAL();
3254 ph10 510 MRRETURN(MATCH_NOMATCH);
3255 ph10 426 }
3256 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3257 nigel 77 }
3258     }
3259     /* Control never gets here */
3260     }
3261    
3262     /* Maximize case */
3263    
3264     else
3265     {
3266     pp = eptr;
3267    
3268     #ifdef SUPPORT_UTF8
3269     /* UTF-8 mode */
3270     if (utf8)
3271     {
3272 nigel 93 register unsigned int d;
3273 nigel 77 for (i = min; i < max; i++)
3274     {
3275     int len = 1;
3276 ph10 463 if (eptr >= md->end_subject)
3277 ph10 462 {
3278 ph10 463 SCHECK_PARTIAL();
3279 ph10 462 break;
3280 ph10 463 }
3281 nigel 77 GETCHARLEN(d, eptr, len);
3282     if (d < 256) d = md->lcc[d];
3283     if (fc == d) break;
3284     eptr += len;
3285     }
3286 nigel 93 if (possessive) continue;
3287     for(;;)
3288 nigel 77 {
3289 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3290 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3291     if (eptr-- == pp) break; /* Stop if tried at original pos */
3292     BACKCHAR(eptr);
3293     }
3294     }
3295     else
3296     #endif
3297     /* Not UTF-8 mode */
3298     {
3299     for (i = min; i < max; i++)
3300     {
3301 ph10 463 if (eptr >= md->end_subject)
3302 ph10 462 {
3303     SCHECK_PARTIAL();
3304     break;
3305 ph10 463 }
3306 ph10 462 if (fc == md->lcc[*eptr]) break;
3307 nigel 77 eptr++;
3308     }
3309 nigel 93 if (possessive) continue;
3310 nigel 77 while (eptr >= pp)
3311     {
3312 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3313 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314     eptr--;
3315     }
3316     }
3317    
3318 ph10 510 MRRETURN(MATCH_NOMATCH);
3319 nigel 77 }
3320     /* Control never gets here */
3321     }
3322    
3323     /* Caseful comparisons */
3324    
3325     else
3326     {
3327     #ifdef SUPPORT_UTF8
3328     /* UTF-8 mode */
3329     if (utf8)
3330     {
3331 nigel 93 register unsigned int d;
3332 nigel 77 for (i = 1; i <= min; i++)
3333     {
3334 ph10 426 if (eptr >= md->end_subject)
3335     {
3336     SCHECK_PARTIAL();
3337 ph10 510 MRRETURN(MATCH_NOMATCH);
3338 ph10 427 }
3339 nigel 77 GETCHARINC(d, eptr);
3340 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3341 nigel 77 }
3342     }
3343     else
3344     #endif
3345     /* Not UTF-8 mode */
3346     {
3347     for (i = 1; i <= min; i++)
3348 ph10 426 {
3349     if (eptr >= md->end_subject)
3350     {
3351     SCHECK_PARTIAL();
3352 ph10 510 MRRETURN(MATCH_NOMATCH);
3353 ph10 427 }
3354 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3355 ph10 427 }
3356 nigel 77 }
3357    
3358     if (min == max) continue;
3359    
3360     if (minimize)
3361     {
3362     #ifdef SUPPORT_UTF8
3363     /* UTF-8 mode */
3364     if (utf8)
3365     {
3366 nigel 93 register unsigned int d;
3367 nigel 77 for (fi = min;; fi++)
3368     {
3369 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3370 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3371 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3372 ph10 427 if (eptr >= md->end_subject)
3373 ph10 426 {
3374 ph10 427 SCHECK_PARTIAL();
3375 ph10 510 MRRETURN(MATCH_NOMATCH);
3376 ph10 427 }
3377 nigel 77 GETCHARINC(d, eptr);
3378 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3379 nigel 77 }
3380     }
3381     else
3382     #endif
3383     /* Not UTF-8 mode */
3384     {
3385     for (fi = min;; fi++)
3386     {
3387 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3388 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390 ph10 426 if (eptr >= md->end_subject)
3391     {
3392     SCHECK_PARTIAL();
3393 ph10 510 MRRETURN(MATCH_NOMATCH);
3394 ph10 427 }
3395 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3396 nigel 77 }
3397     }
3398     /* Control never gets here */
3399     }
3400    
3401     /* Maximize case */
3402    
3403     else
3404     {
3405     pp = eptr;
3406    
3407     #ifdef SUPPORT_UTF8
3408     /* UTF-8 mode */
3409     if (utf8)
3410     {
3411 nigel 93 register unsigned int d;
3412 nigel 77 for (i = min; i < max; i++)
3413     {
3414     int len = 1;
3415 ph10 463 if (eptr >= md->end_subject)
3416 ph10 462 {
3417 ph10 463 SCHECK_PARTIAL();
3418 ph10 462 break;
3419 ph10 463 }
3420 nigel 77 GETCHARLEN(d, eptr, len);
3421     if (fc == d) break;
3422     eptr += len;
3423     }
3424 nigel 93 if (possessive) continue;
3425 nigel 77 for(;;)
3426     {
3427 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3428 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3429     if (eptr-- == pp) break; /* Stop if tried at original pos */
3430     BACKCHAR(eptr);
3431     }
3432     }
3433     else
3434     #endif
3435     /* Not UTF-8 mode */
3436     {
3437     for (i = min; i < max; i++)
3438     {
3439 ph10 463 if (eptr >= md->end_subject)
3440 ph10 462 {
3441 ph10 463 SCHECK_PARTIAL();
3442 ph10 462 break;
3443 ph10 463 }
3444 ph10 462 if (fc == *eptr) break;
3445 nigel 77 eptr++;
3446     }
3447 nigel 93 if (possessive) continue;
3448 nigel 77 while (eptr >= pp)
3449     {
3450 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3451 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3452     eptr--;
3453     }
3454     }
3455    
3456 ph10 510 MRRETURN(MATCH_NOMATCH);
3457 nigel 77 }
3458     }
3459     /* Control never gets here */
3460    
3461     /* Match a single character type repeatedly; several different opcodes
3462     share code. This is very similar to the code for single characters, but we
3463     repeat it in the interests of efficiency. */
3464    
3465     case OP_TYPEEXACT:
3466     min = max = GET2(ecode, 1);
3467     minimize = TRUE;
3468     ecode += 3;
3469     goto REPEATTYPE;
3470    
3471     case OP_TYPEUPTO:
3472     case OP_TYPEMINUPTO:
3473     min = 0;
3474     max = GET2(ecode, 1);
3475     minimize = *ecode == OP_TYPEMINUPTO;
3476     ecode += 3;
3477     goto REPEATTYPE;
3478    
3479 nigel 93 case OP_TYPEPOSSTAR:
3480     possessive = TRUE;
3481     min = 0;
3482     max = INT_MAX;
3483     ecode++;
3484     goto REPEATTYPE;
3485    
3486     case OP_TYPEPOSPLUS:
3487     possessive = TRUE;
3488     min = 1;
3489     max = INT_MAX;
3490     ecode++;
3491     goto REPEATTYPE;
3492    
3493     case OP_TYPEPOSQUERY:
3494     possessive = TRUE;
3495     min = 0;
3496     max = 1;
3497     ecode++;
3498     goto REPEATTYPE;
3499    
3500     case OP_TYPEPOSUPTO:
3501     possessive = TRUE;
3502     min = 0;
3503     max = GET2(ecode, 1);
3504     ecode += 3;
3505     goto REPEATTYPE;
3506    
3507 nigel 77 case OP_TYPESTAR:
3508     case OP_TYPEMINSTAR:
3509     case OP_TYPEPLUS:
3510     case OP_TYPEMINPLUS:
3511     case OP_TYPEQUERY:
3512     case OP_TYPEMINQUERY:
3513     c = *ecode++ - OP_TYPESTAR;
3514     minimize = (c & 1) != 0;
3515     min = rep_min[c]; /* Pick up values from tables; */
3516     max = rep_max[c]; /* zero for max => infinity */
3517     if (max == 0) max = INT_MAX;
3518    
3519     /* Common code for all repeated single character type matches. Note that
3520     in UTF-8 mode, '.' matches a character of any length, but for the other
3521     character types, the valid characters are all one-byte long. */
3522    
3523     REPEATTYPE:
3524     ctype = *ecode++; /* Code for the character type */
3525    
3526     #ifdef SUPPORT_UCP
3527     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3528     {
3529     prop_fail_result = ctype == OP_NOTPROP;
3530     prop_type = *ecode++;
3531 nigel 87 prop_value = *ecode++;
3532 nigel 77 }
3533     else prop_type = -1;
3534     #endif
3535    
3536     /* First, ensure the minimum number of matches are present. Use inline
3537     code for maximizing the speed, and do the type test once at the start
3538 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3539 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3540     and single-bytes. */
3541    
3542     if (min > 0)
3543     {
3544     #ifdef SUPPORT_UCP
3545 nigel 87 if (prop_type >= 0)
3546 nigel 77 {
3547 nigel 87 switch(prop_type)
3548 nigel 77 {
3549 nigel 87 case PT_ANY:
3550 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3551 nigel 87 for (i = 1; i <= min; i++)
3552     {
3553 ph10 427 if (eptr >= md->end_subject)
3554 ph10 426 {
3555 ph10 427 SCHECK_PARTIAL();
3556 ph10 510 MRRETURN(MATCH_NOMATCH);
3557 ph10 427 }
3558 ph10 184 GETCHARINCTEST(c, eptr);
3559 nigel 87 }
3560     break;
3561    
3562     case PT_LAMP:
3563     for (i = 1; i <= min; i++)
3564     {
3565 ph10 427 if (eptr >= md->end_subject)
3566 ph10 426 {
3567 ph10 427 SCHECK_PARTIAL();
3568 ph10 510 MRRETURN(MATCH_NOMATCH);
3569 ph10 427 }
3570 ph10 184 GETCHARINCTEST(c, eptr);
3571 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3572 nigel 87 if ((prop_chartype == ucp_Lu ||
3573     prop_chartype == ucp_Ll ||
3574     prop_chartype == ucp_Lt) == prop_fail_result)
3575 ph10 510 MRRETURN(MATCH_NOMATCH);
3576 nigel 87 }
3577     break;
3578    
3579     case PT_GC:
3580     for (i = 1; i <= min; i++)
3581     {
3582 ph10 427 if (eptr >= md->end_subject)
3583 ph10 426 {
3584 ph10 427 SCHECK_PARTIAL();
3585 ph10 510 MRRETURN(MATCH_NOMATCH);
3586 ph10 427 }
3587 ph10 184 GETCHARINCTEST(c, eptr);
3588 ph10 349 prop_category = UCD_CATEGORY(c);
3589 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3590 ph10 510 MRRETURN(MATCH_NOMATCH);
3591 nigel 87 }
3592     break;
3593    
3594     case PT_PC:
3595     for (i = 1; i <= min; i++)
3596     {
3597 ph10 427 if (eptr >= md->end_subject)
3598 ph10 426 {
3599 ph10 427 SCHECK_PARTIAL();
3600 ph10 510 MRRETURN(MATCH_NOMATCH);
3601 ph10 427 }
3602 ph10 184 GETCHARINCTEST(c, eptr);
3603 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3604 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3605 ph10 510 MRRETURN(MATCH_NOMATCH);
3606 nigel 87 }
3607     break;
3608    
3609     case PT_SC:
3610     for (i = 1; i <= min; i++)
3611     {
3612 ph10 427 if (eptr >= md->end_subject)
3613 ph10 426 {
3614 ph10 427 SCHECK_PARTIAL();
3615 ph10 510 MRRETURN(MATCH_NOMATCH);
3616 ph10 427 }
3617 ph10 184 GETCHARINCTEST(c, eptr);
3618 ph10 349 prop_script = UCD_SCRIPT(c);
3619 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3620 ph10 510 MRRETURN(MATCH_NOMATCH);
3621 nigel 87 }
3622     break;
3623 ph10 527
3624 ph10 517 case PT_ALNUM:
3625     for (i = 1; i <= min; i++)
3626     {
3627     if (eptr >= md->end_subject)
3628     {
3629     SCHECK_PARTIAL();
3630     MRRETURN(MATCH_NOMATCH);
3631     }
3632     GETCHARINCTEST(c, eptr);
3633 ph10 527 prop_category = UCD_CATEGORY(c);
3634     if ((prop_category == ucp_L || prop_category == ucp_N)
3635 ph10 517 == prop_fail_result)
3636     MRRETURN(MATCH_NOMATCH);
3637     }
3638     break;
3639 ph10 527
3640 ph10 517 case PT_SPACE: /* Perl space */
3641     for (i = 1; i <= min; i++)
3642     {
3643     if (eptr >= md->end_subject)
3644     {
3645     SCHECK_PARTIAL();
3646     MRRETURN(MATCH_NOMATCH);
3647     }
3648     GETCHARINCTEST(c, eptr);
3649 ph10 527 prop_category = UCD_CATEGORY(c);
3650     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651     c == CHAR_FF || c == CHAR_CR)
3652 ph10 517 == prop_fail_result)
3653     MRRETURN(MATCH_NOMATCH);
3654     }
3655     break;
3656 ph10 527
3657 ph10 517 case PT_PXSPACE: /* POSIX space */
3658     for (i = 1; i <= min; i++)
3659     {
3660     if (eptr >= md->end_subject)
3661     {
3662     SCHECK_PARTIAL();
3663     MRRETURN(MATCH_NOMATCH);
3664     }
3665     GETCHARINCTEST(c, eptr);
3666 ph10 527 prop_category = UCD_CATEGORY(c);
3667     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669 ph10 517 == prop_fail_result)
3670     MRRETURN(MATCH_NOMATCH);
3671     }
3672     break;
3673 ph10 527
3674     case PT_WORD:
3675 ph10 517 for (i = 1; i <= min; i++)
3676     {
3677     if (eptr >= md->end_subject)
3678     {
3679     SCHECK_PARTIAL();
3680     MRRETURN(MATCH_NOMATCH);
3681     }
3682     GETCHARINCTEST(c, eptr);
3683 ph10 527 prop_category = UCD_CATEGORY(c);
3684 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3685 ph10 527 c == CHAR_UNDERSCORE)
3686 ph10 517 == prop_fail_result)
3687     MRRETURN(MATCH_NOMATCH);
3688     }
3689     break;
3690 ph10 527
3691 ph10 517 /* This should not occur */
3692 nigel 87
3693     default:
3694     RRETURN(PCRE_ERROR_INTERNAL);
3695 nigel 77 }
3696     }
3697    
3698     /* Match extended Unicode sequences. We will get here only if the
3699     support is in the binary; otherwise a compile-time error occurs. */
3700    
3701     else if (ctype == OP_EXTUNI)
3702     {
3703     for (i = 1; i <= min; i++)
3704     {
3705 ph10 427 if (eptr >= md->end_subject)
3706 ph10 426 {
3707 ph10 427 SCHECK_PARTIAL();
3708 ph10 510 MRRETURN(MATCH_NOMATCH);
3709 ph10 427 }
3710 nigel 77 GETCHARINCTEST(c, eptr);
3711 ph10 349 prop_category = UCD_CATEGORY(c);
3712 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3713 nigel 77 while (eptr < md->end_subject)
3714     {
3715     int len = 1;
3716 ph10 426 if (!utf8) c = *eptr;
3717     else { GETCHARLEN(c, eptr, len); }
3718 ph10 349 prop_category = UCD_CATEGORY(c);
3719 nigel 77 if (prop_category != ucp_M) break;
3720     eptr += len;
3721     }
3722     }
3723     }
3724    
3725     else
3726     #endif /* SUPPORT_UCP */
3727    
3728     /* Handle all other cases when the coding is UTF-8 */
3729    
3730     #ifdef SUPPORT_UTF8
3731     if (utf8) switch(ctype)
3732     {
3733     case OP_ANY:
3734     for (i = 1; i <= min; i++)
3735     {
3736 ph10 426 if (eptr >= md->end_subject)
3737     {
3738 ph10 427 SCHECK_PARTIAL();
3739 ph10 510 MRRETURN(MATCH_NOMATCH);
3740 ph10 427 }
3741 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3742 nigel 91 eptr++;
3743 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3744     }
3745     break;
3746    
3747 ph10 341 case OP_ALLANY:
3748     for (i = 1; i <= min; i++)
3749     {
3750 ph10 427 if (eptr >= md->end_subject)
3751 ph10 426 {
3752     SCHECK_PARTIAL();
3753 ph10 510 MRRETURN(MATCH_NOMATCH);
3754 ph10 427 }
3755 ph10 341 eptr++;
3756     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3757     }
3758     break;
3759    
3760 nigel 77 case OP_ANYBYTE:
3761 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3762 nigel 77 eptr += min;
3763     break;
3764    
3765 nigel 93 case OP_ANYNL:
3766     for (i = 1; i <= min; i++)
3767     {
3768 ph10 427 if (eptr >= md->end_subject)
3769 ph10 426 {
3770     SCHECK_PARTIAL();
3771 ph10 510 MRRETURN(MATCH_NOMATCH);
3772 ph10 427 }
3773 nigel 93 GETCHARINC(c, eptr);
3774     switch(c)
3775     {
3776 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3777 nigel 93 case 0x000d:
3778     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3779     break;
3780 ph10 231
3781 nigel 93 case 0x000a:
3782 ph10 231 break;
3783    
3784 nigel 93 case 0x000b:
3785     case 0x000c:
3786     case 0x0085:
3787     case 0x2028:
3788     case 0x2029:
3789 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3790 nigel 93 break;
3791     }
3792     }
3793     break;
3794    
3795 ph10 178 case OP_NOT_HSPACE:
3796     for (i = 1; i <= min; i++)
3797     {
3798 ph10 427 if (eptr >= md->end_subject)
3799 ph10 426 {
3800     SCHECK_PARTIAL();
3801 ph10 510 MRRETURN(MATCH_NOMATCH);
3802 ph10 427 }
3803 ph10 178 GETCHARINC(c, eptr);
3804     switch(c)
3805     {
3806     default: break;
3807     case 0x09: /* HT */
3808     case 0x20: /* SPACE */
3809     case 0xa0: /* NBSP */
3810     case 0x1680: /* OGHAM SPACE MARK */
3811     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3812     case 0x2000: /* EN QUAD */
3813     case 0x2001: /* EM QUAD */
3814     case 0x2002: /* EN SPACE */
3815     case 0x2003: /* EM SPACE */
3816     case 0x2004: /* THREE-PER-EM SPACE */
3817     case 0x2005: /* FOUR-PER-EM SPACE */
3818     case 0x2006: /* SIX-PER-EM SPACE */
3819     case 0x2007: /* FIGURE SPACE */
3820     case 0x2008: /* PUNCTUATION SPACE */
3821     case 0x2009: /* THIN SPACE */
3822     case 0x200A: /* HAIR SPACE */
3823     case 0x202f: /* NARROW NO-BREAK SPACE */
3824     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3825     case 0x3000: /* IDEOGRAPHIC SPACE */
3826 ph10 510 MRRETURN(MATCH_NOMATCH);
3827 ph10 178 }
3828     }
3829     break;
3830 ph10 182
3831 ph10 178 case OP_HSPACE:
3832     for (i = 1; i <= min; i++)
3833     {
3834 ph10 427 if (eptr >= md->end_subject)
3835 ph10 426 {
3836 ph10 427 SCHECK_PARTIAL();
3837 ph10 510 MRRETURN(MATCH_NOMATCH);
3838 ph10 427 }
3839 ph10 178 GETCHARINC(c, eptr);
3840     switch(c)
3841     {
3842 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3843 ph10 178 case 0x09: /* HT */
3844     case 0x20: /* SPACE */
3845     case 0xa0: /* NBSP */
3846     case 0x1680: /* OGHAM SPACE MARK */
3847     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */