/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 455 - (hide annotations) (download)
Sat Sep 26 19:12:32 2009 UTC (5 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 163053 byte(s)
Added lower bound length-finding to pcre_study() and use it when matching; make 
the value available via pcre_fullinfo(); also fixed bugs connected with
pcre_study() in pcre_dfa_exec(). 

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 381 Copyright (c) 1997-2009 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77     #define MATCH_THEN (-996)
78    
79 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
80     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81     because the offset vector is always a multiple of 3 long. */
82    
83     #define REC_STACK_SAVE_MAX 30
84    
85     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86    
87     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89    
90    
91    
92     #ifdef DEBUG
93     /*************************************************
94     * Debugging function to print chars *
95     *************************************************/
96    
97     /* Print a sequence of chars in printable format, stopping at the end of the
98     subject if the requested.
99    
100     Arguments:
101     p points to characters
102     length number to print
103     is_subject TRUE if printing from within md->start_subject
104     md pointer to matching data block, if is_subject is TRUE
105    
106     Returns: nothing
107     */
108    
109     static void
110     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111     {
112 nigel 93 unsigned int c;
113 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114     while (length-- > 0)
115     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116     }
117     #endif
118    
119    
120    
121     /*************************************************
122     * Match a back-reference *
123     *************************************************/
124    
125     /* If a back reference hasn't been set, the length that is passed is greater
126     than the number of characters left in the string, so the match fails.
127    
128     Arguments:
129     offset index into the offset vector
130     eptr points into the subject
131     length length to be matched
132     md points to match data block
133     ims the ims flags
134    
135     Returns: TRUE if matched
136     */
137    
138     static BOOL
139 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 nigel 77 unsigned long int ims)
141     {
142 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
143 nigel 77
144     #ifdef DEBUG
145     if (eptr >= md->end_subject)
146     printf("matching subject <null>");
147     else
148     {
149     printf("matching subject ");
150     pchars(eptr, length, TRUE, md);
151     }
152     printf(" against backref ");
153     pchars(p, length, FALSE, md);
154     printf("\n");
155     #endif
156    
157     /* Always fail if not enough characters left */
158    
159     if (length > md->end_subject - eptr) return FALSE;
160    
161 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162     properly if Unicode properties are supported. Otherwise, we can check only
163     ASCII characters. */
164 nigel 77
165     if ((ims & PCRE_CASELESS) != 0)
166     {
167 ph10 354 #ifdef SUPPORT_UTF8
168     #ifdef SUPPORT_UCP
169     if (md->utf8)
170     {
171 ph10 358 USPTR endptr = eptr + length;
172 ph10 354 while (eptr < endptr)
173     {
174 ph10 358 int c, d;
175 ph10 354 GETCHARINC(c, eptr);
176     GETCHARINC(d, p);
177     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 ph10 358 }
179     }
180 ph10 354 else
181     #endif
182     #endif
183    
184     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185     is no UCP support. */
186 ph10 358
187 nigel 77 while (length-- > 0)
188 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 nigel 77 }
190 ph10 358
191 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
192     are in UTF-8 mode. */
193 ph10 358
194 nigel 77 else
195     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
197     return TRUE;
198     }
199    
200    
201    
202     /***************************************************************************
203     ****************************************************************************
204     RECURSION IN THE match() FUNCTION
205    
206 nigel 87 The match() function is highly recursive, though not every recursive call
207     increases the recursive depth. Nevertheless, some regular expressions can cause
208     it to recurse to a great depth. I was writing for Unix, so I just let it call
209     itself recursively. This uses the stack for saving everything that has to be
210     saved for a recursive call. On Unix, the stack can be large, and this works
211     fine.
212 nigel 77
213 nigel 87 It turns out that on some non-Unix-like systems there are problems with
214     programs that use a lot of stack. (This despite the fact that every last chip
215     has oodles of memory these days, and techniques for extending the stack have
216     been known for decades.) So....
217 nigel 77
218     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219     calls by keeping local variables that need to be preserved in blocks of memory
220 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
221 nigel 77 achieve this so that the actual code doesn't look very different to what it
222     always used to.
223 ph10 164
224 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
225 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
226     Switzer, the use of longjmp() has been abolished, at the cost of having to
227     provide a unique number for each call to RMATCH. There is no way of generating
228     a sequence of numbers at compile time in C. I have given them names, to make
229     them stand out more clearly.
230    
231     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
234     don't have indeterminate values; this has meant that the frame size can be
235 ph10 164 reduced because the result can be "passed back" by straight setting of the
236     variable instead of being passed in the frame.
237 nigel 77 ****************************************************************************
238     ***************************************************************************/
239    
240 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241     below must be updated in sync. */
242 nigel 77
243 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 ph10 212 RM51, RM52, RM53, RM54 };
249 ph10 164
250 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
251 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 ph10 164 actuall used in this definition. */
253 nigel 77
254     #ifndef NO_RECURSE
255     #define REGISTER register
256 ph10 164
257 nigel 87 #ifdef DEBUG
258 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 nigel 87 { \
260     printf("match() called in line %d\n", __LINE__); \
261 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 nigel 87 printf("to line %d\n", __LINE__); \
263     }
264     #define RRETURN(ra) \
265     { \
266     printf("match() returned %d from line %d ", ra, __LINE__); \
267     return ra; \
268     }
269     #else
270 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 ph10 168 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 nigel 77 #define RRETURN(ra) return ra
273 nigel 87 #endif
274    
275 nigel 77 #else
276    
277    
278 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
279     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280     argument of match(), which never changes. */
281 nigel 77
282     #define REGISTER
283    
284 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 nigel 77 {\
286     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 ph10 164 frame->Xwhere = rw; \
288     newframe->Xeptr = ra;\
289     newframe->Xecode = rb;\
290 ph10 168 newframe->Xmstart = mstart;\
291 ph10 164 newframe->Xoffset_top = rc;\
292     newframe->Xims = re;\
293     newframe->Xeptrb = rf;\
294     newframe->Xflags = rg;\
295     newframe->Xrdepth = frame->Xrdepth + 1;\
296     newframe->Xprevframe = frame;\
297     frame = newframe;\
298     DPRINTF(("restarting from line %d\n", __LINE__));\
299     goto HEAP_RECURSE;\
300     L_##rw:\
301     DPRINTF(("jumped back to line %d\n", __LINE__));\
302 nigel 77 }
303    
304     #define RRETURN(ra)\
305     {\
306     heapframe *newframe = frame;\
307     frame = newframe->Xprevframe;\
308     (pcre_stack_free)(newframe);\
309     if (frame != NULL)\
310     {\
311 ph10 164 rrc = ra;\
312     goto HEAP_RETURN;\
313 nigel 77 }\
314     return ra;\
315     }
316    
317    
318     /* Structure for remembering the local variables in a private frame */
319    
320     typedef struct heapframe {
321     struct heapframe *Xprevframe;
322    
323     /* Function arguments that may change */
324    
325 ph10 409 USPTR Xeptr;
326 nigel 77 const uschar *Xecode;
327 ph10 409 USPTR Xmstart;
328 nigel 77 int Xoffset_top;
329     long int Xims;
330     eptrblock *Xeptrb;
331     int Xflags;
332 nigel 91 unsigned int Xrdepth;
333 nigel 77
334     /* Function local variables */
335    
336 ph10 409 USPTR Xcallpat;
337 ph10 406 #ifdef SUPPORT_UTF8
338 ph10 409 USPTR Xcharptr;
339 ph10 406 #endif
340 ph10 409 USPTR Xdata;
341     USPTR Xnext;
342     USPTR Xpp;
343     USPTR Xprev;
344     USPTR Xsaved_eptr;
345 nigel 77
346     recursion_info Xnew_recursive;
347    
348     BOOL Xcur_is_word;
349     BOOL Xcondition;
350     BOOL Xprev_is_word;
351    
352     unsigned long int Xoriginal_ims;
353    
354     #ifdef SUPPORT_UCP
355     int Xprop_type;
356 nigel 87 int Xprop_value;
357 nigel 77 int Xprop_fail_result;
358     int Xprop_category;
359     int Xprop_chartype;
360 nigel 87 int Xprop_script;
361 ph10 123 int Xoclength;
362     uschar Xocchars[8];
363 nigel 77 #endif
364    
365 ph10 403 int Xcodelink;
366 nigel 77 int Xctype;
367 nigel 93 unsigned int Xfc;
368 nigel 77 int Xfi;
369     int Xlength;
370     int Xmax;
371     int Xmin;
372     int Xnumber;
373     int Xoffset;
374     int Xop;
375     int Xsave_capture_last;
376     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377     int Xstacksave[REC_STACK_SAVE_MAX];
378    
379     eptrblock Xnewptrb;
380    
381 ph10 164 /* Where to jump back to */
382 nigel 77
383 ph10 164 int Xwhere;
384 ph10 165
385 nigel 77 } heapframe;
386    
387     #endif
388    
389    
390     /***************************************************************************
391     ***************************************************************************/
392    
393    
394    
395     /*************************************************
396     * Match from current position *
397     *************************************************/
398    
399 nigel 93 /* This function is called recursively in many circumstances. Whenever it
400 nigel 77 returns a negative (error) response, the outer incarnation must also return the
401 ph10 426 same response. */
402 nigel 77
403 ph10 426 /* These macros pack up tests that are used for partial matching, and which
404     appears several times in the code. We set the "hit end" flag if the pointer is
405     at the end of the subject and also past the start of the subject (i.e.
406 ph10 427 something has been matched). For hard partial matching, we then return
407     immediately. The second one is used when we already know we are past the end of
408     the subject. */
409 ph10 426
410     #define CHECK_PARTIAL()\
411 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412 ph10 427 {\
413     md->hitend = TRUE;\
414     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415     }
416 ph10 426
417     #define SCHECK_PARTIAL()\
418 ph10 427 if (md->partial && eptr > mstart)\
419     {\
420     md->hitend = TRUE;\
421     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422     }
423 ph10 426
424 ph10 427
425 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
426     the md structure (e.g. utf8, end_subject) into individual variables to improve
427 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
428     made performance worse.
429    
430     Arguments:
431 nigel 93 eptr pointer to current character in subject
432     ecode pointer to current position in compiled code
433 ph10 168 mstart pointer to the current match start position (can be modified
434 ph10 172 by encountering \K)
435 nigel 77 offset_top current top pointer
436     md pointer to "static" info for the match
437     ims current /i, /m, and /s options
438     eptrb pointer to chain of blocks containing eptr at start of
439     brackets - for testing for empty matches
440     flags can contain
441     match_condassert - this is an assertion condition
442 nigel 93 match_cbegroup - this is the start of an unlimited repeat
443     group that can match an empty string
444 nigel 87 rdepth the recursion depth
445 nigel 77
446     Returns: MATCH_MATCH if matched ) these values are >= 0
447     MATCH_NOMATCH if failed to match )
448     a negative PCRE_ERROR_xxx value if aborted by an error condition
449 nigel 87 (e.g. stopped by repeated call or recursion limit)
450 nigel 77 */
451    
452     static int
453 ph10 409 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454 nigel 77 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455 nigel 91 int flags, unsigned int rdepth)
456 nigel 77 {
457     /* These variables do not need to be preserved over recursion in this function,
458 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
459     "register" because they are used a lot in loops. */
460 nigel 77
461 nigel 91 register int rrc; /* Returns from recursive calls */
462     register int i; /* Used for loops not involving calls to RMATCH() */
463 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
464 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
465 nigel 77
466 nigel 93 BOOL minimize, possessive; /* Quantifier options */
467 ph10 403 int condcode;
468 nigel 93
469 nigel 77 /* When recursion is not being used, all "local" variables that have to be
470     preserved over calls to RMATCH() are part of a "frame" which is obtained from
471     heap storage. Set up the top-level frame here; others are obtained from the
472     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
473    
474     #ifdef NO_RECURSE
475     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
476     frame->Xprevframe = NULL; /* Marks the top level */
477    
478     /* Copy in the original argument variables */
479    
480     frame->Xeptr = eptr;
481     frame->Xecode = ecode;
482 ph10 168 frame->Xmstart = mstart;
483 nigel 77 frame->Xoffset_top = offset_top;
484     frame->Xims = ims;
485     frame->Xeptrb = eptrb;
486     frame->Xflags = flags;
487 nigel 87 frame->Xrdepth = rdepth;
488 nigel 77
489     /* This is where control jumps back to to effect "recursion" */
490    
491     HEAP_RECURSE:
492    
493     /* Macros make the argument variables come from the current frame */
494    
495     #define eptr frame->Xeptr
496     #define ecode frame->Xecode
497 ph10 168 #define mstart frame->Xmstart
498 nigel 77 #define offset_top frame->Xoffset_top
499     #define ims frame->Xims
500     #define eptrb frame->Xeptrb
501     #define flags frame->Xflags
502 nigel 87 #define rdepth frame->Xrdepth
503 nigel 77
504     /* Ditto for the local variables */
505    
506     #ifdef SUPPORT_UTF8
507     #define charptr frame->Xcharptr
508     #endif
509     #define callpat frame->Xcallpat
510 ph10 403 #define codelink frame->Xcodelink
511 nigel 77 #define data frame->Xdata
512     #define next frame->Xnext
513     #define pp frame->Xpp
514     #define prev frame->Xprev
515     #define saved_eptr frame->Xsaved_eptr
516    
517     #define new_recursive frame->Xnew_recursive
518    
519     #define cur_is_word frame->Xcur_is_word
520     #define condition frame->Xcondition
521     #define prev_is_word frame->Xprev_is_word
522    
523     #define original_ims frame->Xoriginal_ims
524    
525     #ifdef SUPPORT_UCP
526     #define prop_type frame->Xprop_type
527 nigel 87 #define prop_value frame->Xprop_value
528 nigel 77 #define prop_fail_result frame->Xprop_fail_result
529     #define prop_category frame->Xprop_category
530     #define prop_chartype frame->Xprop_chartype
531 nigel 87 #define prop_script frame->Xprop_script
532 ph10 115 #define oclength frame->Xoclength
533     #define occhars frame->Xocchars
534 nigel 77 #endif
535    
536     #define ctype frame->Xctype
537     #define fc frame->Xfc
538     #define fi frame->Xfi
539     #define length frame->Xlength
540     #define max frame->Xmax
541     #define min frame->Xmin
542     #define number frame->Xnumber
543     #define offset frame->Xoffset
544     #define op frame->Xop
545     #define save_capture_last frame->Xsave_capture_last
546     #define save_offset1 frame->Xsave_offset1
547     #define save_offset2 frame->Xsave_offset2
548     #define save_offset3 frame->Xsave_offset3
549     #define stacksave frame->Xstacksave
550    
551     #define newptrb frame->Xnewptrb
552    
553     /* When recursion is being used, local variables are allocated on the stack and
554     get preserved during recursion in the normal way. In this environment, fi and
555     i, and fc and c, can be the same variables. */
556    
557 nigel 93 #else /* NO_RECURSE not defined */
558 nigel 77 #define fi i
559     #define fc c
560    
561    
562 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
563     const uschar *charptr; /* in small blocks of the code. My normal */
564     #endif /* style of coding would have declared */
565     const uschar *callpat; /* them within each of those blocks. */
566     const uschar *data; /* However, in order to accommodate the */
567     const uschar *next; /* version of this code that uses an */
568     USPTR pp; /* external "stack" implemented on the */
569     const uschar *prev; /* heap, it is easier to declare them all */
570     USPTR saved_eptr; /* here, so the declarations can be cut */
571     /* out in a block. The only declarations */
572     recursion_info new_recursive; /* within blocks below are for variables */
573     /* that do not have to be preserved over */
574     BOOL cur_is_word; /* a recursive call to RMATCH(). */
575     BOOL condition;
576 nigel 77 BOOL prev_is_word;
577    
578     unsigned long int original_ims;
579    
580     #ifdef SUPPORT_UCP
581     int prop_type;
582 nigel 87 int prop_value;
583 nigel 77 int prop_fail_result;
584     int prop_category;
585     int prop_chartype;
586 nigel 87 int prop_script;
587 ph10 115 int oclength;
588     uschar occhars[8];
589 nigel 77 #endif
590    
591 ph10 399 int codelink;
592 nigel 77 int ctype;
593     int length;
594     int max;
595     int min;
596     int number;
597     int offset;
598     int op;
599     int save_capture_last;
600     int save_offset1, save_offset2, save_offset3;
601     int stacksave[REC_STACK_SAVE_MAX];
602    
603     eptrblock newptrb;
604 nigel 93 #endif /* NO_RECURSE */
605 nigel 77
606     /* These statements are here to stop the compiler complaining about unitialized
607     variables. */
608    
609     #ifdef SUPPORT_UCP
610 nigel 87 prop_value = 0;
611 nigel 77 prop_fail_result = 0;
612     #endif
613    
614 nigel 93
615 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
616     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
617     used. Thanks to Ian Taylor for noticing this possibility and sending the
618     original patch. */
619    
620     TAIL_RECURSE:
621    
622 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
623     are specified by the macro RMATCH and RRETURN is used to return. When
624     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
625     and a "return", respectively (possibly with some debugging if DEBUG is
626     defined). However, RMATCH isn't like a function call because it's quite a
627     complicated macro. It has to be used in one particular way. This shouldn't,
628     however, impact performance when true recursion is being used. */
629 nigel 77
630 ph10 164 #ifdef SUPPORT_UTF8
631     utf8 = md->utf8; /* Local copy of the flag */
632     #else
633     utf8 = FALSE;
634     #endif
635    
636 nigel 87 /* First check that we haven't called match() too many times, or that we
637     haven't exceeded the recursive call limit. */
638    
639 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
640 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
641 nigel 77
642     original_ims = ims; /* Save for resetting on ')' */
643 nigel 91
644 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
645     string, the match_cbegroup flag is set. When this is the case, add the current
646     subject pointer to the chain of such remembered pointers, to be checked when we
647     hit the closing ket, in order to break infinite loops that match no characters.
648 ph10 197 When match() is called in other circumstances, don't add to the chain. The
649     match_cbegroup flag must NOT be used with tail recursion, because the memory
650     block that is used is on the stack, so a new one may be required for each
651     match(). */
652 nigel 77
653 nigel 93 if ((flags & match_cbegroup) != 0)
654 nigel 77 {
655 ph10 197 newptrb.epb_saved_eptr = eptr;
656     newptrb.epb_prev = eptrb;
657     eptrb = &newptrb;
658 nigel 77 }
659    
660 nigel 93 /* Now start processing the opcodes. */
661 nigel 77
662     for (;;)
663     {
664 nigel 93 minimize = possessive = FALSE;
665 nigel 77 op = *ecode;
666 ph10 443
667 nigel 93 switch(op)
668     {
669 ph10 210 case OP_FAIL:
670 ph10 212 RRETURN(MATCH_NOMATCH);
671 ph10 211
672 ph10 210 case OP_PRUNE:
673     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
674     ims, eptrb, flags, RM51);
675     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676 ph10 212 RRETURN(MATCH_PRUNE);
677 ph10 211
678 ph10 210 case OP_COMMIT:
679     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
680     ims, eptrb, flags, RM52);
681     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682 ph10 212 RRETURN(MATCH_COMMIT);
683 ph10 211
684 ph10 210 case OP_SKIP:
685     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
686     ims, eptrb, flags, RM53);
687     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
688 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
689 ph10 212 RRETURN(MATCH_SKIP);
690 ph10 211
691 ph10 210 case OP_THEN:
692     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693 ph10 212 ims, eptrb, flags, RM54);
694 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695 ph10 212 RRETURN(MATCH_THEN);
696 ph10 211
697 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
698     the current subject position in the working slot at the top of the vector.
699     We mustn't change the current values of the data slot, because they may be
700     set from a previous iteration of this group, and be referred to by a
701     reference inside the group.
702 nigel 77
703 nigel 93 If the bracket fails to match, we need to restore this value and also the
704     values of the final offsets, in case they were set by a previous iteration
705     of the same bracket.
706 nigel 77
707 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
708     a non-capturing bracket. Don't worry about setting the flag for the error
709     case here; that is handled in the code for KET. */
710 nigel 77
711 nigel 93 case OP_CBRA:
712     case OP_SCBRA:
713     number = GET2(ecode, 1+LINK_SIZE);
714 nigel 77 offset = number << 1;
715    
716     #ifdef DEBUG
717 nigel 93 printf("start bracket %d\n", number);
718     printf("subject=");
719 nigel 77 pchars(eptr, 16, TRUE, md);
720     printf("\n");
721     #endif
722    
723     if (offset < md->offset_max)
724     {
725     save_offset1 = md->offset_vector[offset];
726     save_offset2 = md->offset_vector[offset+1];
727     save_offset3 = md->offset_vector[md->offset_end - number];
728     save_capture_last = md->capture_last;
729    
730     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
731     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
732    
733 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
734 nigel 77 do
735     {
736 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
737     ims, eptrb, flags, RM1);
738 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
739 nigel 77 md->capture_last = save_capture_last;
740     ecode += GET(ecode, 1);
741     }
742     while (*ecode == OP_ALT);
743    
744     DPRINTF(("bracket %d failed\n", number));
745    
746     md->offset_vector[offset] = save_offset1;
747     md->offset_vector[offset+1] = save_offset2;
748     md->offset_vector[md->offset_end - number] = save_offset3;
749    
750     RRETURN(MATCH_NOMATCH);
751     }
752    
753 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
754     as a non-capturing bracket. */
755 nigel 77
756 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
757     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
758    
759 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
760 nigel 77
761 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763    
764 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
765     final alternative within the brackets, we would return the result of a
766     recursive call to match() whatever happened. We can reduce stack usage by
767 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
768     is set.*/
769 nigel 77
770 nigel 93 case OP_BRA:
771     case OP_SBRA:
772     DPRINTF(("start non-capturing bracket\n"));
773     flags = (op >= OP_SBRA)? match_cbegroup : 0;
774 nigel 91 for (;;)
775 nigel 77 {
776 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
777 nigel 93 {
778 ph10 197 if (flags == 0) /* Not a possibly empty group */
779     {
780     ecode += _pcre_OP_lengths[*ecode];
781     DPRINTF(("bracket 0 tail recursion\n"));
782     goto TAIL_RECURSE;
783     }
784    
785     /* Possibly empty group; can't use tail recursion. */
786    
787     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
788     eptrb, flags, RM48);
789     RRETURN(rrc);
790 nigel 93 }
791 nigel 91
792     /* For non-final alternatives, continue the loop for a NOMATCH result;
793     otherwise return. */
794    
795 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
796     eptrb, flags, RM2);
797 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
798 nigel 77 ecode += GET(ecode, 1);
799     }
800 nigel 91 /* Control never reaches here. */
801 nigel 77
802     /* Conditional group: compilation checked that there are no more than
803     two branches. If the condition is false, skipping the first branch takes us
804     past the end if there is only one branch, but that's OK because that is
805 nigel 91 exactly what going to the ket would do. As there is only one branch to be
806     obeyed, we can use tail recursion to avoid using another stack frame. */
807 nigel 77
808     case OP_COND:
809 nigel 93 case OP_SCOND:
810 ph10 399 codelink= GET(ecode, 1);
811 ph10 406
812 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
813     inserted between OP_COND and an assertion condition. */
814 ph10 392
815 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816     {
817     if (pcre_callout != NULL)
818     {
819     pcre_callout_block cb;
820     cb.version = 1; /* Version 1 of the callout block */
821     cb.callout_number = ecode[LINK_SIZE+2];
822     cb.offset_vector = md->offset_vector;
823     cb.subject = (PCRE_SPTR)md->start_subject;
824     cb.subject_length = md->end_subject - md->start_subject;
825     cb.start_match = mstart - md->start_subject;
826     cb.current_position = eptr - md->start_subject;
827     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829     cb.capture_top = offset_top/2;
830     cb.capture_last = md->capture_last;
831     cb.callout_data = md->callout_data;
832     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833     if (rrc < 0) RRETURN(rrc);
834     }
835     ecode += _pcre_OP_lengths[OP_CALLOUT];
836     }
837 ph10 392
838 ph10 399 condcode = ecode[LINK_SIZE+1];
839 ph10 406
840 ph10 381 /* Now see what the actual condition is */
841 ph10 392
842 ph10 399 if (condcode == OP_RREF) /* Recursion test */
843 nigel 77 {
844 nigel 93 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
845     condition = md->recursive != NULL &&
846     (offset == RREF_ANY || offset == md->recursive->group_num);
847     ecode += condition? 3 : GET(ecode, 1);
848     }
849    
850 ph10 399 else if (condcode == OP_CREF) /* Group used test */
851 nigel 93 {
852 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
853 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
854     ecode += condition? 3 : GET(ecode, 1);
855 nigel 77 }
856    
857 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
858 nigel 93 {
859     condition = FALSE;
860     ecode += GET(ecode, 1);
861     }
862    
863 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
864 nigel 93 the final argument match_condassert causes it to stop at the end of an
865     assertion. */
866 nigel 77
867     else
868     {
869 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
870     match_condassert, RM3);
871 nigel 77 if (rrc == MATCH_MATCH)
872     {
873 nigel 93 condition = TRUE;
874     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
875 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
876     }
877 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
878 nigel 77 {
879     RRETURN(rrc); /* Need braces because of following else */
880     }
881 nigel 93 else
882     {
883     condition = FALSE;
884 ph10 399 ecode += codelink;
885 nigel 93 }
886     }
887 nigel 91
888 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
889 ph10 197 we can use tail recursion to avoid using another stack frame, except when
890     match_cbegroup is required for an unlimited repeat of a possibly empty
891     group. If the second alternative doesn't exist, we can just plough on. */
892 nigel 91
893 nigel 93 if (condition || *ecode == OP_ALT)
894     {
895 nigel 91 ecode += 1 + LINK_SIZE;
896 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
897     {
898     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
899     RRETURN(rrc);
900     }
901     else /* Group must match something */
902     {
903     flags = 0;
904     goto TAIL_RECURSE;
905     }
906 nigel 77 }
907 ph10 395 else /* Condition false & no alternative */
908 nigel 93 {
909     ecode += 1 + LINK_SIZE;
910     }
911     break;
912 ph10 447
913 nigel 77
914 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
915     to close any currently open capturing brackets. */
916    
917     case OP_CLOSE:
918     number = GET2(ecode, 1);
919     offset = number << 1;
920    
921     #ifdef DEBUG
922     printf("end bracket %d at *ACCEPT", number);
923     printf("\n");
924     #endif
925 nigel 77
926 ph10 447 md->capture_last = number;
927     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
928     {
929     md->offset_vector[offset] =
930     md->offset_vector[md->offset_end - number];
931     md->offset_vector[offset+1] = eptr - md->start_subject;
932     if (offset_top <= offset) offset_top = offset + 2;
933     }
934     ecode += 3;
935     break;
936    
937    
938 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
939     recursion, we should restore the offsets appropriately and continue from
940     after the call. */
941 nigel 77
942 ph10 210 case OP_ACCEPT:
943 nigel 77 case OP_END:
944     if (md->recursive != NULL && md->recursive->group_num == 0)
945     {
946     recursion_info *rec = md->recursive;
947 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
948 nigel 77 md->recursive = rec->prevrec;
949     memmove(md->offset_vector, rec->offset_save,
950     rec->saved_max * sizeof(int));
951 ph10 446 offset_top = rec->offset_top;
952 ph10 168 mstart = rec->save_start;
953 nigel 77 ims = original_ims;
954     ecode = rec->after_call;
955     break;
956     }
957    
958 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
959     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
960     the subject. In both cases, backtracking will then try other alternatives,
961     if any. */
962 ph10 443
963 ph10 442 if (eptr == mstart &&
964     (md->notempty ||
965 ph10 443 (md->notempty_atstart &&
966 ph10 442 mstart == md->start_subject + md->start_offset)))
967 ph10 443 RRETURN(MATCH_NOMATCH);
968    
969 ph10 442 /* Otherwise, we have a match. */
970 nigel 77
971 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
972     md->end_offset_top = offset_top; /* and how many extracts were taken */
973 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
974 nigel 77 RRETURN(MATCH_MATCH);
975    
976     /* Change option settings */
977    
978     case OP_OPT:
979     ims = ecode[1];
980     ecode += 2;
981     DPRINTF(("ims set to %02lx\n", ims));
982     break;
983    
984     /* Assertion brackets. Check the alternative branches in turn - the
985     matching won't pass the KET for an assertion. If any one branch matches,
986     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
987     start of each branch to move the current point backwards, so the code at
988     this level is identical to the lookahead case. */
989    
990     case OP_ASSERT:
991     case OP_ASSERTBACK:
992     do
993     {
994 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
995     RM4);
996 nigel 77 if (rrc == MATCH_MATCH) break;
997 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
998 nigel 77 ecode += GET(ecode, 1);
999     }
1000     while (*ecode == OP_ALT);
1001     if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1002    
1003     /* If checking an assertion for a condition, return MATCH_MATCH. */
1004    
1005     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1006    
1007     /* Continue from after the assertion, updating the offsets high water
1008     mark, since extracts may have been taken during the assertion. */
1009    
1010     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1011     ecode += 1 + LINK_SIZE;
1012     offset_top = md->end_offset_top;
1013     continue;
1014    
1015     /* Negative assertion: all branches must fail to match */
1016    
1017     case OP_ASSERT_NOT:
1018     case OP_ASSERTBACK_NOT:
1019     do
1020     {
1021 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1022     RM5);
1023 nigel 77 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1024 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1025 nigel 77 ecode += GET(ecode,1);
1026     }
1027     while (*ecode == OP_ALT);
1028    
1029     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1030    
1031     ecode += 1 + LINK_SIZE;
1032     continue;
1033    
1034     /* Move the subject pointer back. This occurs only at the start of
1035     each branch of a lookbehind assertion. If we are too close to the start to
1036     move back, this match function fails. When working with UTF-8 we move
1037     back a number of characters, not bytes. */
1038    
1039     case OP_REVERSE:
1040     #ifdef SUPPORT_UTF8
1041     if (utf8)
1042     {
1043 nigel 93 i = GET(ecode, 1);
1044     while (i-- > 0)
1045 nigel 77 {
1046     eptr--;
1047     if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1048 ph10 207 BACKCHAR(eptr);
1049 nigel 77 }
1050     }
1051     else
1052     #endif
1053    
1054     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1055    
1056     {
1057 nigel 93 eptr -= GET(ecode, 1);
1058 nigel 77 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1059     }
1060    
1061 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1062 nigel 77
1063 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1064 nigel 77 ecode += 1 + LINK_SIZE;
1065     break;
1066    
1067     /* The callout item calls an external function, if one is provided, passing
1068     details of the match so far. This is mainly for debugging, though the
1069     function is able to force a failure. */
1070    
1071     case OP_CALLOUT:
1072     if (pcre_callout != NULL)
1073     {
1074     pcre_callout_block cb;
1075     cb.version = 1; /* Version 1 of the callout block */
1076     cb.callout_number = ecode[1];
1077     cb.offset_vector = md->offset_vector;
1078 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1079 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1080 ph10 168 cb.start_match = mstart - md->start_subject;
1081 nigel 77 cb.current_position = eptr - md->start_subject;
1082     cb.pattern_position = GET(ecode, 2);
1083     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1084     cb.capture_top = offset_top/2;
1085     cb.capture_last = md->capture_last;
1086     cb.callout_data = md->callout_data;
1087     if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1088     if (rrc < 0) RRETURN(rrc);
1089     }
1090     ecode += 2 + 2*LINK_SIZE;
1091     break;
1092    
1093     /* Recursion either matches the current regex, or some subexpression. The
1094     offset data is the offset to the starting bracket from the start of the
1095     whole pattern. (This is so that it works from duplicated subpatterns.)
1096    
1097     If there are any capturing brackets started but not finished, we have to
1098     save their starting points and reinstate them after the recursion. However,
1099     we don't know how many such there are (offset_top records the completed
1100     total) so we just have to save all the potential data. There may be up to
1101     65535 such values, which is too large to put on the stack, but using malloc
1102     for small numbers seems expensive. As a compromise, the stack is used when
1103     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1104     is used. A problem is what to do if the malloc fails ... there is no way of
1105     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1106     values on the stack, and accept that the rest may be wrong.
1107    
1108     There are also other values that have to be saved. We use a chained
1109     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1110     for the original version of this logic. */
1111    
1112     case OP_RECURSE:
1113     {
1114     callpat = md->start_code + GET(ecode, 1);
1115 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1116     GET2(callpat, 1 + LINK_SIZE);
1117 nigel 77
1118     /* Add to "recursing stack" */
1119    
1120     new_recursive.prevrec = md->recursive;
1121     md->recursive = &new_recursive;
1122    
1123     /* Find where to continue from afterwards */
1124    
1125     ecode += 1 + LINK_SIZE;
1126     new_recursive.after_call = ecode;
1127    
1128     /* Now save the offset data. */
1129    
1130     new_recursive.saved_max = md->offset_end;
1131     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1132     new_recursive.offset_save = stacksave;
1133     else
1134     {
1135     new_recursive.offset_save =
1136     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1137     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1138     }
1139    
1140     memcpy(new_recursive.offset_save, md->offset_vector,
1141     new_recursive.saved_max * sizeof(int));
1142 ph10 168 new_recursive.save_start = mstart;
1143 ph10 446 new_recursive.offset_top = offset_top;
1144 ph10 168 mstart = eptr;
1145 nigel 77
1146     /* OK, now we can do the recursion. For each top-level alternative we
1147     restore the offset and recursion data. */
1148    
1149     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1150 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1151 nigel 77 do
1152     {
1153 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1154     md, ims, eptrb, flags, RM6);
1155 nigel 77 if (rrc == MATCH_MATCH)
1156     {
1157 nigel 87 DPRINTF(("Recursion matched\n"));
1158 nigel 77 md->recursive = new_recursive.prevrec;
1159     if (new_recursive.offset_save != stacksave)
1160     (pcre_free)(new_recursive.offset_save);
1161     RRETURN(MATCH_MATCH);
1162     }
1163 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1164 nigel 87 {
1165     DPRINTF(("Recursion gave error %d\n", rrc));
1166 ph10 400 if (new_recursive.offset_save != stacksave)
1167     (pcre_free)(new_recursive.offset_save);
1168 nigel 87 RRETURN(rrc);
1169     }
1170 nigel 77
1171     md->recursive = &new_recursive;
1172     memcpy(md->offset_vector, new_recursive.offset_save,
1173     new_recursive.saved_max * sizeof(int));
1174     callpat += GET(callpat, 1);
1175     }
1176     while (*callpat == OP_ALT);
1177    
1178     DPRINTF(("Recursion didn't match\n"));
1179     md->recursive = new_recursive.prevrec;
1180     if (new_recursive.offset_save != stacksave)
1181     (pcre_free)(new_recursive.offset_save);
1182     RRETURN(MATCH_NOMATCH);
1183     }
1184     /* Control never reaches here */
1185    
1186     /* "Once" brackets are like assertion brackets except that after a match,
1187     the point in the subject string is not moved back. Thus there can never be
1188     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1189     Check the alternative branches in turn - the matching won't pass the KET
1190     for this kind of subpattern. If any one branch matches, we carry on as at
1191     the end of a normal bracket, leaving the subject pointer. */
1192    
1193     case OP_ONCE:
1194 nigel 91 prev = ecode;
1195     saved_eptr = eptr;
1196    
1197     do
1198 nigel 77 {
1199 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1200 nigel 91 if (rrc == MATCH_MATCH) break;
1201 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1202 nigel 91 ecode += GET(ecode,1);
1203     }
1204     while (*ecode == OP_ALT);
1205 nigel 77
1206 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1207 nigel 77
1208 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1209 nigel 77
1210 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1211     mark, since extracts may have been taken. */
1212 nigel 77
1213 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1214 nigel 77
1215 nigel 91 offset_top = md->end_offset_top;
1216     eptr = md->end_match_ptr;
1217 nigel 77
1218 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1219     happens for a repeating ket if no characters were matched in the group.
1220     This is the forcible breaking of infinite loops as implemented in Perl
1221     5.005. If there is an options reset, it will get obeyed in the normal
1222     course of events. */
1223 nigel 77
1224 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1225     {
1226     ecode += 1+LINK_SIZE;
1227     break;
1228     }
1229 nigel 77
1230 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1231     preceding bracket, in the appropriate order. The second "call" of match()
1232     uses tail recursion, to avoid using another stack frame. We need to reset
1233     any options that changed within the bracket before re-running it, so
1234     check the next opcode. */
1235 nigel 77
1236 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1237     {
1238     ims = (ims & ~PCRE_IMS) | ecode[4];
1239     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1240     }
1241 nigel 77
1242 nigel 91 if (*ecode == OP_KETRMIN)
1243     {
1244 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1245 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1246     ecode = prev;
1247 ph10 197 flags = 0;
1248 nigel 91 goto TAIL_RECURSE;
1249 nigel 77 }
1250 nigel 91 else /* OP_KETRMAX */
1251     {
1252 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1253 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1254     ecode += 1 + LINK_SIZE;
1255 ph10 197 flags = 0;
1256 nigel 91 goto TAIL_RECURSE;
1257     }
1258     /* Control never gets here */
1259 nigel 77
1260     /* An alternation is the end of a branch; scan along to find the end of the
1261     bracketed group and go to there. */
1262    
1263     case OP_ALT:
1264     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1265     break;
1266    
1267 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1268     indicating that it may occur zero times. It may repeat infinitely, or not
1269     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1270     with fixed upper repeat limits are compiled as a number of copies, with the
1271     optional ones preceded by BRAZERO or BRAMINZERO. */
1272 nigel 77
1273     case OP_BRAZERO:
1274     {
1275     next = ecode+1;
1276 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1277 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1278     do next += GET(next,1); while (*next == OP_ALT);
1279 nigel 93 ecode = next + 1 + LINK_SIZE;
1280 nigel 77 }
1281     break;
1282    
1283     case OP_BRAMINZERO:
1284     {
1285     next = ecode+1;
1286 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1287 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1288 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1289     ecode++;
1290     }
1291     break;
1292    
1293 ph10 335 case OP_SKIPZERO:
1294     {
1295     next = ecode+1;
1296     do next += GET(next,1); while (*next == OP_ALT);
1297     ecode = next + 1 + LINK_SIZE;
1298     }
1299     break;
1300    
1301 nigel 93 /* End of a group, repeated or non-repeating. */
1302 nigel 77
1303     case OP_KET:
1304     case OP_KETRMIN:
1305     case OP_KETRMAX:
1306 nigel 91 prev = ecode - GET(ecode, 1);
1307 nigel 77
1308 nigel 93 /* If this was a group that remembered the subject start, in order to break
1309     infinite repeats of empty string matches, retrieve the subject start from
1310     the chain. Otherwise, set it NULL. */
1311 nigel 77
1312 nigel 93 if (*prev >= OP_SBRA)
1313     {
1314     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1315     eptrb = eptrb->epb_prev; /* Backup to previous group */
1316     }
1317     else saved_eptr = NULL;
1318 nigel 77
1319 nigel 93 /* If we are at the end of an assertion group, stop matching and return
1320     MATCH_MATCH, but record the current high water mark for use by positive
1321     assertions. Do this also for the "once" (atomic) groups. */
1322    
1323 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1324     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1325     *prev == OP_ONCE)
1326     {
1327     md->end_match_ptr = eptr; /* For ONCE */
1328     md->end_offset_top = offset_top;
1329     RRETURN(MATCH_MATCH);
1330     }
1331 nigel 77
1332 nigel 93 /* For capturing groups we have to check the group number back at the start
1333     and if necessary complete handling an extraction by setting the offsets and
1334     bumping the high water mark. Note that whole-pattern recursion is coded as
1335     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1336     when the OP_END is reached. Other recursion is handled here. */
1337 nigel 77
1338 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1339 nigel 91 {
1340 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1341 nigel 91 offset = number << 1;
1342 ph10 446
1343 nigel 77 #ifdef DEBUG
1344 nigel 91 printf("end bracket %d", number);
1345     printf("\n");
1346 nigel 77 #endif
1347    
1348 nigel 93 md->capture_last = number;
1349     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1350 nigel 91 {
1351 nigel 93 md->offset_vector[offset] =
1352     md->offset_vector[md->offset_end - number];
1353     md->offset_vector[offset+1] = eptr - md->start_subject;
1354     if (offset_top <= offset) offset_top = offset + 2;
1355     }
1356 nigel 77
1357 nigel 93 /* Handle a recursively called group. Restore the offsets
1358     appropriately and continue from after the call. */
1359 nigel 77
1360 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1361     {
1362     recursion_info *rec = md->recursive;
1363     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1364     md->recursive = rec->prevrec;
1365 ph10 168 mstart = rec->save_start;
1366 nigel 93 memcpy(md->offset_vector, rec->offset_save,
1367     rec->saved_max * sizeof(int));
1368 ph10 446 offset_top = rec->offset_top;
1369 nigel 93 ecode = rec->after_call;
1370     ims = original_ims;
1371     break;
1372 nigel 77 }
1373 nigel 91 }
1374 nigel 77
1375 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1376     flags, in case they got changed during the group. */
1377 nigel 77
1378 nigel 91 ims = original_ims;
1379     DPRINTF(("ims reset to %02lx\n", ims));
1380 nigel 77
1381 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1382     happens for a repeating ket if no characters were matched in the group.
1383     This is the forcible breaking of infinite loops as implemented in Perl
1384     5.005. If there is an options reset, it will get obeyed in the normal
1385     course of events. */
1386 nigel 77
1387 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1388     {
1389     ecode += 1 + LINK_SIZE;
1390     break;
1391     }
1392 nigel 77
1393 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1394     preceding bracket, in the appropriate order. In the second case, we can use
1395 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1396     unlimited repeat of a group that can match an empty string. */
1397 nigel 77
1398 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1399    
1400 nigel 91 if (*ecode == OP_KETRMIN)
1401     {
1402 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1403 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1404 ph10 197 if (flags != 0) /* Could match an empty string */
1405     {
1406     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1407     RRETURN(rrc);
1408     }
1409 nigel 91 ecode = prev;
1410     goto TAIL_RECURSE;
1411 nigel 77 }
1412 nigel 91 else /* OP_KETRMAX */
1413     {
1414 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1415 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1416     ecode += 1 + LINK_SIZE;
1417 ph10 197 flags = 0;
1418 nigel 91 goto TAIL_RECURSE;
1419     }
1420     /* Control never gets here */
1421 nigel 77
1422     /* Start of subject unless notbol, or after internal newline if multiline */
1423    
1424     case OP_CIRC:
1425     if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1426     if ((ims & PCRE_MULTILINE) != 0)
1427     {
1428 nigel 91 if (eptr != md->start_subject &&
1429 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1430 nigel 77 RRETURN(MATCH_NOMATCH);
1431     ecode++;
1432     break;
1433     }
1434     /* ... else fall through */
1435    
1436     /* Start of subject assertion */
1437    
1438     case OP_SOD:
1439     if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1440     ecode++;
1441     break;
1442    
1443     /* Start of match assertion */
1444    
1445     case OP_SOM:
1446     if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1447     ecode++;
1448     break;
1449 ph10 172
1450 ph10 168 /* Reset the start of match point */
1451 ph10 172
1452 ph10 168 case OP_SET_SOM:
1453     mstart = eptr;
1454 ph10 172 ecode++;
1455     break;
1456 nigel 77
1457     /* Assert before internal newline if multiline, or before a terminating
1458     newline unless endonly is set, else end of subject unless noteol is set. */
1459    
1460     case OP_DOLL:
1461     if ((ims & PCRE_MULTILINE) != 0)
1462     {
1463     if (eptr < md->end_subject)
1464 nigel 91 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1465 nigel 77 else
1466     { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1467     ecode++;
1468     break;
1469     }
1470     else
1471     {
1472     if (md->noteol) RRETURN(MATCH_NOMATCH);
1473     if (!md->endonly)
1474     {
1475 nigel 91 if (eptr != md->end_subject &&
1476 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1477 nigel 77 RRETURN(MATCH_NOMATCH);
1478     ecode++;
1479     break;
1480     }
1481     }
1482 nigel 91 /* ... else fall through for endonly */
1483 nigel 77
1484     /* End of subject assertion (\z) */
1485    
1486     case OP_EOD:
1487     if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1488     ecode++;
1489     break;
1490    
1491     /* End of subject or ending \n assertion (\Z) */
1492    
1493     case OP_EODN:
1494 nigel 91 if (eptr != md->end_subject &&
1495 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1496 nigel 91 RRETURN(MATCH_NOMATCH);
1497 nigel 77 ecode++;
1498     break;
1499    
1500     /* Word boundary assertions */
1501    
1502     case OP_NOT_WORD_BOUNDARY:
1503     case OP_WORD_BOUNDARY:
1504     {
1505    
1506     /* Find out if the previous and current characters are "word" characters.
1507     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1508 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1509 ph10 435 partial matching. */
1510 nigel 77
1511     #ifdef SUPPORT_UTF8
1512     if (utf8)
1513     {
1514     if (eptr == md->start_subject) prev_is_word = FALSE; else
1515     {
1516 ph10 409 USPTR lastptr = eptr - 1;
1517 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1518 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1519 nigel 77 GETCHAR(c, lastptr);
1520     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1521     }
1522 ph10 443 if (eptr >= md->end_subject)
1523 nigel 77 {
1524 ph10 443 SCHECK_PARTIAL();
1525     cur_is_word = FALSE;
1526 ph10 428 }
1527     else
1528     {
1529 nigel 77 GETCHAR(c, eptr);
1530     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1531     }
1532     }
1533     else
1534     #endif
1535    
1536 ph10 428 /* Not in UTF-8 mode */
1537 nigel 77
1538     {
1539 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1540     {
1541 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1542 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1543     }
1544 ph10 443 if (eptr >= md->end_subject)
1545 ph10 428 {
1546 ph10 443 SCHECK_PARTIAL();
1547     cur_is_word = FALSE;
1548 ph10 428 }
1549     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1550 nigel 77 }
1551    
1552     /* Now see if the situation is what we want */
1553    
1554     if ((*ecode++ == OP_WORD_BOUNDARY)?
1555     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1556     RRETURN(MATCH_NOMATCH);
1557     }
1558     break;
1559    
1560     /* Match a single character type; inline for speed */
1561    
1562     case OP_ANY:
1563 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1564 ph10 345 /* Fall through */
1565    
1566 ph10 341 case OP_ALLANY:
1567 ph10 443 if (eptr++ >= md->end_subject)
1568 ph10 428 {
1569 ph10 443 SCHECK_PARTIAL();
1570 ph10 428 RRETURN(MATCH_NOMATCH);
1571 ph10 443 }
1572 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1573 nigel 77 ecode++;
1574     break;
1575    
1576     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1577     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1578    
1579     case OP_ANYBYTE:
1580 ph10 443 if (eptr++ >= md->end_subject)
1581 ph10 428 {
1582 ph10 443 SCHECK_PARTIAL();
1583 ph10 428 RRETURN(MATCH_NOMATCH);
1584 ph10 443 }
1585 nigel 77 ecode++;
1586     break;
1587    
1588     case OP_NOT_DIGIT:
1589 ph10 443 if (eptr >= md->end_subject)
1590 ph10 428 {
1591 ph10 443 SCHECK_PARTIAL();
1592 ph10 428 RRETURN(MATCH_NOMATCH);
1593 ph10 443 }
1594 nigel 77 GETCHARINCTEST(c, eptr);
1595     if (
1596     #ifdef SUPPORT_UTF8
1597     c < 256 &&
1598     #endif
1599     (md->ctypes[c] & ctype_digit) != 0
1600     )
1601     RRETURN(MATCH_NOMATCH);
1602     ecode++;
1603     break;
1604    
1605     case OP_DIGIT:
1606 ph10 443 if (eptr >= md->end_subject)
1607 ph10 428 {
1608 ph10 443 SCHECK_PARTIAL();
1609 ph10 428 RRETURN(MATCH_NOMATCH);
1610 ph10 443 }
1611 nigel 77 GETCHARINCTEST(c, eptr);
1612     if (
1613     #ifdef SUPPORT_UTF8
1614     c >= 256 ||
1615     #endif
1616     (md->ctypes[c] & ctype_digit) == 0
1617     )
1618     RRETURN(MATCH_NOMATCH);
1619     ecode++;
1620     break;
1621    
1622     case OP_NOT_WHITESPACE:
1623 ph10 443 if (eptr >= md->end_subject)
1624 ph10 428 {
1625 ph10 443 SCHECK_PARTIAL();
1626 ph10 428 RRETURN(MATCH_NOMATCH);
1627 ph10 443 }
1628 nigel 77 GETCHARINCTEST(c, eptr);
1629     if (
1630     #ifdef SUPPORT_UTF8
1631     c < 256 &&
1632     #endif
1633     (md->ctypes[c] & ctype_space) != 0
1634     )
1635     RRETURN(MATCH_NOMATCH);
1636     ecode++;
1637     break;
1638    
1639     case OP_WHITESPACE:
1640 ph10 443 if (eptr >= md->end_subject)
1641 ph10 428 {
1642 ph10 443 SCHECK_PARTIAL();
1643 ph10 428 RRETURN(MATCH_NOMATCH);
1644 ph10 443 }
1645 nigel 77 GETCHARINCTEST(c, eptr);
1646     if (
1647     #ifdef SUPPORT_UTF8
1648     c >= 256 ||
1649     #endif
1650     (md->ctypes[c] & ctype_space) == 0
1651     )
1652     RRETURN(MATCH_NOMATCH);
1653     ecode++;
1654     break;
1655    
1656     case OP_NOT_WORDCHAR:
1657 ph10 443 if (eptr >= md->end_subject)
1658 ph10 428 {
1659 ph10 443 SCHECK_PARTIAL();
1660 ph10 428 RRETURN(MATCH_NOMATCH);
1661 ph10 443 }
1662 nigel 77 GETCHARINCTEST(c, eptr);
1663     if (
1664     #ifdef SUPPORT_UTF8
1665     c < 256 &&
1666     #endif
1667     (md->ctypes[c] & ctype_word) != 0
1668     )
1669     RRETURN(MATCH_NOMATCH);
1670     ecode++;
1671     break;
1672    
1673     case OP_WORDCHAR:
1674 ph10 443 if (eptr >= md->end_subject)
1675 ph10 428 {
1676 ph10 443 SCHECK_PARTIAL();
1677 ph10 428 RRETURN(MATCH_NOMATCH);
1678 ph10 443 }
1679 nigel 77 GETCHARINCTEST(c, eptr);
1680     if (
1681     #ifdef SUPPORT_UTF8
1682     c >= 256 ||
1683     #endif
1684     (md->ctypes[c] & ctype_word) == 0
1685     )
1686     RRETURN(MATCH_NOMATCH);
1687     ecode++;
1688     break;
1689    
1690 nigel 93 case OP_ANYNL:
1691 ph10 443 if (eptr >= md->end_subject)
1692 ph10 428 {
1693 ph10 443 SCHECK_PARTIAL();
1694 ph10 428 RRETURN(MATCH_NOMATCH);
1695 ph10 443 }
1696 nigel 93 GETCHARINCTEST(c, eptr);
1697     switch(c)
1698     {
1699     default: RRETURN(MATCH_NOMATCH);
1700     case 0x000d:
1701     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1702     break;
1703 ph10 231
1704 nigel 93 case 0x000a:
1705 ph10 231 break;
1706    
1707 nigel 93 case 0x000b:
1708     case 0x000c:
1709     case 0x0085:
1710     case 0x2028:
1711     case 0x2029:
1712 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1713 nigel 93 break;
1714     }
1715     ecode++;
1716     break;
1717    
1718 ph10 178 case OP_NOT_HSPACE:
1719 ph10 443 if (eptr >= md->end_subject)
1720 ph10 428 {
1721 ph10 443 SCHECK_PARTIAL();
1722 ph10 428 RRETURN(MATCH_NOMATCH);
1723 ph10 443 }
1724 ph10 178 GETCHARINCTEST(c, eptr);
1725     switch(c)
1726     {
1727     default: break;
1728     case 0x09: /* HT */
1729     case 0x20: /* SPACE */
1730     case 0xa0: /* NBSP */
1731     case 0x1680: /* OGHAM SPACE MARK */
1732     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1733     case 0x2000: /* EN QUAD */
1734     case 0x2001: /* EM QUAD */
1735     case 0x2002: /* EN SPACE */
1736     case 0x2003: /* EM SPACE */
1737     case 0x2004: /* THREE-PER-EM SPACE */
1738     case 0x2005: /* FOUR-PER-EM SPACE */
1739     case 0x2006: /* SIX-PER-EM SPACE */
1740     case 0x2007: /* FIGURE SPACE */
1741     case 0x2008: /* PUNCTUATION SPACE */
1742     case 0x2009: /* THIN SPACE */
1743     case 0x200A: /* HAIR SPACE */
1744     case 0x202f: /* NARROW NO-BREAK SPACE */
1745     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1746     case 0x3000: /* IDEOGRAPHIC SPACE */
1747     RRETURN(MATCH_NOMATCH);
1748     }
1749     ecode++;
1750     break;
1751    
1752     case OP_HSPACE:
1753 ph10 443 if (eptr >= md->end_subject)
1754 ph10 428 {
1755 ph10 443 SCHECK_PARTIAL();
1756 ph10 428 RRETURN(MATCH_NOMATCH);
1757 ph10 443 }
1758 ph10 178 GETCHARINCTEST(c, eptr);
1759     switch(c)
1760     {
1761     default: RRETURN(MATCH_NOMATCH);
1762     case 0x09: /* HT */
1763     case 0x20: /* SPACE */
1764     case 0xa0: /* NBSP */
1765     case 0x1680: /* OGHAM SPACE MARK */
1766     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1767     case 0x2000: /* EN QUAD */
1768     case 0x2001: /* EM QUAD */
1769     case 0x2002: /* EN SPACE */
1770     case 0x2003: /* EM SPACE */
1771     case 0x2004: /* THREE-PER-EM SPACE */
1772     case 0x2005: /* FOUR-PER-EM SPACE */
1773     case 0x2006: /* SIX-PER-EM SPACE */
1774     case 0x2007: /* FIGURE SPACE */
1775     case 0x2008: /* PUNCTUATION SPACE */
1776     case 0x2009: /* THIN SPACE */
1777     case 0x200A: /* HAIR SPACE */
1778     case 0x202f: /* NARROW NO-BREAK SPACE */
1779     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1780     case 0x3000: /* IDEOGRAPHIC SPACE */
1781     break;
1782     }
1783     ecode++;
1784     break;
1785    
1786     case OP_NOT_VSPACE:
1787 ph10 443 if (eptr >= md->end_subject)
1788 ph10 428 {
1789 ph10 443 SCHECK_PARTIAL();
1790 ph10 428 RRETURN(MATCH_NOMATCH);
1791 ph10 443 }
1792 ph10 178 GETCHARINCTEST(c, eptr);
1793     switch(c)
1794     {
1795     default: break;
1796     case 0x0a: /* LF */
1797     case 0x0b: /* VT */
1798     case 0x0c: /* FF */
1799     case 0x0d: /* CR */
1800     case 0x85: /* NEL */
1801     case 0x2028: /* LINE SEPARATOR */
1802     case 0x2029: /* PARAGRAPH SEPARATOR */
1803     RRETURN(MATCH_NOMATCH);
1804     }
1805     ecode++;
1806     break;
1807    
1808     case OP_VSPACE:
1809 ph10 443 if (eptr >= md->end_subject)
1810 ph10 428 {
1811 ph10 443 SCHECK_PARTIAL();
1812 ph10 428 RRETURN(MATCH_NOMATCH);
1813 ph10 443 }
1814 ph10 178 GETCHARINCTEST(c, eptr);
1815     switch(c)
1816     {
1817     default: RRETURN(MATCH_NOMATCH);
1818     case 0x0a: /* LF */
1819     case 0x0b: /* VT */
1820     case 0x0c: /* FF */
1821     case 0x0d: /* CR */
1822     case 0x85: /* NEL */
1823     case 0x2028: /* LINE SEPARATOR */
1824     case 0x2029: /* PARAGRAPH SEPARATOR */
1825     break;
1826     }
1827     ecode++;
1828     break;
1829    
1830 nigel 77 #ifdef SUPPORT_UCP
1831     /* Check the next character by Unicode property. We will get here only
1832     if the support is in the binary; otherwise a compile-time error occurs. */
1833    
1834     case OP_PROP:
1835     case OP_NOTPROP:
1836 ph10 443 if (eptr >= md->end_subject)
1837 ph10 428 {
1838 ph10 443 SCHECK_PARTIAL();
1839 ph10 428 RRETURN(MATCH_NOMATCH);
1840 ph10 443 }
1841 nigel 77 GETCHARINCTEST(c, eptr);
1842     {
1843 ph10 384 const ucd_record *prop = GET_UCD(c);
1844 nigel 77
1845 nigel 87 switch(ecode[1])
1846     {
1847     case PT_ANY:
1848     if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1849     break;
1850 nigel 77
1851 nigel 87 case PT_LAMP:
1852 ph10 349 if ((prop->chartype == ucp_Lu ||
1853     prop->chartype == ucp_Ll ||
1854     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1855 nigel 77 RRETURN(MATCH_NOMATCH);
1856 nigel 87 break;
1857    
1858     case PT_GC:
1859 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1860 nigel 77 RRETURN(MATCH_NOMATCH);
1861 nigel 87 break;
1862    
1863     case PT_PC:
1864 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1865 nigel 87 RRETURN(MATCH_NOMATCH);
1866     break;
1867    
1868     case PT_SC:
1869 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
1870 nigel 87 RRETURN(MATCH_NOMATCH);
1871     break;
1872    
1873     default:
1874     RRETURN(PCRE_ERROR_INTERNAL);
1875 nigel 77 }
1876 nigel 87
1877     ecode += 3;
1878 nigel 77 }
1879     break;
1880    
1881     /* Match an extended Unicode sequence. We will get here only if the support
1882     is in the binary; otherwise a compile-time error occurs. */
1883    
1884     case OP_EXTUNI:
1885 ph10 443 if (eptr >= md->end_subject)
1886 ph10 428 {
1887 ph10 443 SCHECK_PARTIAL();
1888 ph10 428 RRETURN(MATCH_NOMATCH);
1889 ph10 443 }
1890 nigel 77 GETCHARINCTEST(c, eptr);
1891     {
1892 ph10 349 int category = UCD_CATEGORY(c);
1893 nigel 77 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1894     while (eptr < md->end_subject)
1895     {
1896     int len = 1;
1897     if (!utf8) c = *eptr; else
1898     {
1899     GETCHARLEN(c, eptr, len);
1900     }
1901 ph10 349 category = UCD_CATEGORY(c);
1902 nigel 77 if (category != ucp_M) break;
1903     eptr += len;
1904     }
1905     }
1906     ecode++;
1907     break;
1908     #endif
1909    
1910    
1911     /* Match a back reference, possibly repeatedly. Look past the end of the
1912     item to see if there is repeat information following. The code is similar
1913     to that for character classes, but repeated for efficiency. Then obey
1914     similar code to character type repeats - written out again for speed.
1915     However, if the referenced string is the empty string, always treat
1916     it as matched, any number of times (otherwise there could be infinite
1917     loops). */
1918    
1919     case OP_REF:
1920     {
1921     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1922 ph10 345 ecode += 3;
1923    
1924 ph10 336 /* If the reference is unset, there are two possibilities:
1925 ph10 345
1926 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
1927     than the amount of subject left; this ensures that every attempt at a
1928     match fails. We can't just fail here, because of the possibility of
1929     quantifiers with zero minima.
1930 ph10 345
1931     (b) If the JavaScript compatibility flag is set, set the length to zero
1932     so that the back reference matches an empty string.
1933    
1934     Otherwise, set the length to the length of what was matched by the
1935 ph10 336 referenced subpattern. */
1936 ph10 345
1937 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
1938 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1939 ph10 336 else
1940     length = md->offset_vector[offset+1] - md->offset_vector[offset];
1941 nigel 77
1942     /* Set up for repetition, or handle the non-repeated case */
1943    
1944     switch (*ecode)
1945     {
1946     case OP_CRSTAR:
1947     case OP_CRMINSTAR:
1948     case OP_CRPLUS:
1949     case OP_CRMINPLUS:
1950     case OP_CRQUERY:
1951     case OP_CRMINQUERY:
1952     c = *ecode++ - OP_CRSTAR;
1953     minimize = (c & 1) != 0;
1954     min = rep_min[c]; /* Pick up values from tables; */
1955     max = rep_max[c]; /* zero for max => infinity */
1956     if (max == 0) max = INT_MAX;
1957     break;
1958    
1959     case OP_CRRANGE:
1960     case OP_CRMINRANGE:
1961     minimize = (*ecode == OP_CRMINRANGE);
1962     min = GET2(ecode, 1);
1963     max = GET2(ecode, 3);
1964     if (max == 0) max = INT_MAX;
1965     ecode += 5;
1966     break;
1967    
1968     default: /* No repeat follows */
1969 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
1970 ph10 428 {
1971 ph10 443 CHECK_PARTIAL();
1972 ph10 428 RRETURN(MATCH_NOMATCH);
1973 ph10 443 }
1974 nigel 77 eptr += length;
1975     continue; /* With the main loop */
1976     }
1977    
1978     /* If the length of the reference is zero, just continue with the
1979     main loop. */
1980 ph10 443
1981 nigel 77 if (length == 0) continue;
1982    
1983     /* First, ensure the minimum number of matches are present. We get back
1984     the length of the reference string explicitly rather than passing the
1985     address of eptr, so that eptr can be a register variable. */
1986    
1987     for (i = 1; i <= min; i++)
1988     {
1989 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
1990 ph10 426 {
1991 ph10 427 CHECK_PARTIAL();
1992 ph10 426 RRETURN(MATCH_NOMATCH);
1993 ph10 427 }
1994 nigel 77 eptr += length;
1995     }
1996    
1997     /* If min = max, continue at the same level without recursion.
1998     They are not both allowed to be zero. */
1999    
2000     if (min == max) continue;
2001    
2002     /* If minimizing, keep trying and advancing the pointer */
2003    
2004     if (minimize)
2005     {
2006     for (fi = min;; fi++)
2007     {
2008 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2009 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2010 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2011     if (!match_ref(offset, eptr, length, md, ims))
2012 ph10 426 {
2013 ph10 427 CHECK_PARTIAL();
2014 nigel 77 RRETURN(MATCH_NOMATCH);
2015 ph10 427 }
2016 nigel 77 eptr += length;
2017     }
2018     /* Control never gets here */
2019     }
2020    
2021     /* If maximizing, find the longest string and work backwards */
2022    
2023     else
2024     {
2025     pp = eptr;
2026     for (i = min; i < max; i++)
2027     {
2028     if (!match_ref(offset, eptr, length, md, ims)) break;
2029     eptr += length;
2030     }
2031     while (eptr >= pp)
2032     {
2033 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2034 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2035     eptr -= length;
2036     }
2037     RRETURN(MATCH_NOMATCH);
2038     }
2039     }
2040     /* Control never gets here */
2041    
2042     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2043     used when all the characters in the class have values in the range 0-255,
2044     and either the matching is caseful, or the characters are in the range
2045     0-127 when UTF-8 processing is enabled. The only difference between
2046     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2047     encountered.
2048    
2049     First, look past the end of the item to see if there is repeat information
2050     following. Then obey similar code to character type repeats - written out
2051     again for speed. */
2052    
2053     case OP_NCLASS:
2054     case OP_CLASS:
2055     {
2056     data = ecode + 1; /* Save for matching */
2057     ecode += 33; /* Advance past the item */
2058    
2059     switch (*ecode)
2060     {
2061     case OP_CRSTAR:
2062     case OP_CRMINSTAR:
2063     case OP_CRPLUS:
2064     case OP_CRMINPLUS:
2065     case OP_CRQUERY:
2066     case OP_CRMINQUERY:
2067     c = *ecode++ - OP_CRSTAR;
2068     minimize = (c & 1) != 0;
2069     min = rep_min[c]; /* Pick up values from tables; */
2070     max = rep_max[c]; /* zero for max => infinity */
2071     if (max == 0) max = INT_MAX;
2072     break;
2073    
2074     case OP_CRRANGE:
2075     case OP_CRMINRANGE:
2076     minimize = (*ecode == OP_CRMINRANGE);
2077     min = GET2(ecode, 1);
2078     max = GET2(ecode, 3);
2079     if (max == 0) max = INT_MAX;
2080     ecode += 5;
2081     break;
2082    
2083     default: /* No repeat follows */
2084     min = max = 1;
2085     break;
2086     }
2087    
2088     /* First, ensure the minimum number of matches are present. */
2089    
2090     #ifdef SUPPORT_UTF8
2091     /* UTF-8 mode */
2092     if (utf8)
2093     {
2094     for (i = 1; i <= min; i++)
2095     {
2096 ph10 427 if (eptr >= md->end_subject)
2097 ph10 426 {
2098 ph10 428 SCHECK_PARTIAL();
2099 ph10 426 RRETURN(MATCH_NOMATCH);
2100 ph10 427 }
2101 nigel 77 GETCHARINC(c, eptr);
2102     if (c > 255)
2103     {
2104     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2105     }
2106     else
2107     {
2108     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2109     }
2110     }
2111     }
2112     else
2113     #endif
2114     /* Not UTF-8 mode */
2115     {
2116     for (i = 1; i <= min; i++)
2117     {
2118 ph10 427 if (eptr >= md->end_subject)
2119 ph10 426 {
2120 ph10 428 SCHECK_PARTIAL();
2121 ph10 426 RRETURN(MATCH_NOMATCH);
2122 ph10 427 }
2123 nigel 77 c = *eptr++;
2124     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2125     }
2126     }
2127    
2128     /* If max == min we can continue with the main loop without the
2129     need to recurse. */
2130    
2131     if (min == max) continue;
2132    
2133     /* If minimizing, keep testing the rest of the expression and advancing
2134     the pointer while it matches the class. */
2135    
2136     if (minimize)
2137     {
2138     #ifdef SUPPORT_UTF8
2139     /* UTF-8 mode */
2140     if (utf8)
2141     {
2142     for (fi = min;; fi++)
2143     {
2144 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2145 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2146 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2147 ph10 427 if (eptr >= md->end_subject)
2148 ph10 426 {
2149 ph10 427 SCHECK_PARTIAL();
2150 ph10 426 RRETURN(MATCH_NOMATCH);
2151 ph10 427 }
2152 nigel 77 GETCHARINC(c, eptr);
2153     if (c > 255)
2154     {
2155     if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2156     }
2157     else
2158     {
2159     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2160     }
2161     }
2162     }
2163     else
2164     #endif
2165     /* Not UTF-8 mode */
2166     {
2167     for (fi = min;; fi++)
2168     {
2169 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2170 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2171 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2172 ph10 427 if (eptr >= md->end_subject)
2173 ph10 426 {
2174 ph10 427 SCHECK_PARTIAL();
2175 ph10 426 RRETURN(MATCH_NOMATCH);
2176 ph10 427 }
2177 nigel 77 c = *eptr++;
2178     if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2179     }
2180     }
2181     /* Control never gets here */
2182     }
2183    
2184     /* If maximizing, find the longest possible run, then work backwards. */
2185    
2186     else
2187     {
2188     pp = eptr;
2189    
2190     #ifdef SUPPORT_UTF8
2191     /* UTF-8 mode */
2192     if (utf8)
2193     {
2194     for (i = min; i < max; i++)
2195     {
2196     int len = 1;
2197     if (eptr >= md->end_subject) break;
2198     GETCHARLEN(c, eptr, len);
2199     if (c > 255)
2200     {
2201     if (op == OP_CLASS) break;
2202     }
2203     else
2204     {
2205     if ((data[c/8] & (1 << (c&7))) == 0) break;
2206     }
2207     eptr += len;
2208     }
2209     for (;;)
2210     {
2211 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2212 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2213     if (eptr-- == pp) break; /* Stop if tried at original pos */
2214     BACKCHAR(eptr);
2215     }
2216     }
2217     else
2218     #endif
2219     /* Not UTF-8 mode */
2220     {
2221     for (i = min; i < max; i++)
2222     {
2223     if (eptr >= md->end_subject) break;
2224     c = *eptr;
2225     if ((data[c/8] & (1 << (c&7))) == 0) break;
2226     eptr++;
2227     }
2228     while (eptr >= pp)
2229     {
2230 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2231 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2232 nigel 77 eptr--;
2233     }
2234     }
2235    
2236     RRETURN(MATCH_NOMATCH);
2237     }
2238     }
2239     /* Control never gets here */
2240    
2241    
2242     /* Match an extended character class. This opcode is encountered only
2243 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2244     mode, because Unicode properties are supported in non-UTF-8 mode. */
2245 nigel 77
2246     #ifdef SUPPORT_UTF8
2247     case OP_XCLASS:
2248     {
2249     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2250     ecode += GET(ecode, 1); /* Advance past the item */
2251    
2252     switch (*ecode)
2253     {
2254     case OP_CRSTAR:
2255     case OP_CRMINSTAR:
2256     case OP_CRPLUS:
2257     case OP_CRMINPLUS:
2258     case OP_CRQUERY:
2259     case OP_CRMINQUERY:
2260     c = *ecode++ - OP_CRSTAR;
2261     minimize = (c & 1) != 0;
2262     min = rep_min[c]; /* Pick up values from tables; */
2263     max = rep_max[c]; /* zero for max => infinity */
2264     if (max == 0) max = INT_MAX;
2265     break;
2266    
2267     case OP_CRRANGE:
2268     case OP_CRMINRANGE:
2269     minimize = (*ecode == OP_CRMINRANGE);
2270     min = GET2(ecode, 1);
2271     max = GET2(ecode, 3);
2272     if (max == 0) max = INT_MAX;
2273     ecode += 5;
2274     break;
2275    
2276     default: /* No repeat follows */
2277     min = max = 1;
2278     break;
2279     }
2280    
2281     /* First, ensure the minimum number of matches are present. */
2282    
2283     for (i = 1; i <= min; i++)
2284     {
2285 ph10 427 if (eptr >= md->end_subject)
2286 ph10 426 {
2287     SCHECK_PARTIAL();
2288     RRETURN(MATCH_NOMATCH);
2289 ph10 427 }
2290 ph10 384 GETCHARINCTEST(c, eptr);
2291 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2292     }
2293    
2294     /* If max == min we can continue with the main loop without the
2295     need to recurse. */
2296    
2297     if (min == max) continue;
2298    
2299     /* If minimizing, keep testing the rest of the expression and advancing
2300     the pointer while it matches the class. */
2301    
2302     if (minimize)
2303     {
2304     for (fi = min;; fi++)
2305     {
2306 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2307 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2308 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2309 ph10 427 if (eptr >= md->end_subject)
2310 ph10 426 {
2311 ph10 427 SCHECK_PARTIAL();
2312 ph10 426 RRETURN(MATCH_NOMATCH);
2313 ph10 427 }
2314 ph10 384 GETCHARINCTEST(c, eptr);
2315 nigel 77 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2316     }
2317     /* Control never gets here */
2318     }
2319    
2320     /* If maximizing, find the longest possible run, then work backwards. */
2321    
2322     else
2323     {
2324     pp = eptr;
2325     for (i = min; i < max; i++)
2326     {
2327     int len = 1;
2328     if (eptr >= md->end_subject) break;
2329 ph10 384 GETCHARLENTEST(c, eptr, len);
2330 nigel 77 if (!_pcre_xclass(c, data)) break;
2331     eptr += len;
2332     }
2333     for(;;)
2334     {
2335 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2336 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2337     if (eptr-- == pp) break; /* Stop if tried at original pos */
2338 ph10 214 if (utf8) BACKCHAR(eptr);
2339 nigel 77 }
2340     RRETURN(MATCH_NOMATCH);
2341     }
2342    
2343     /* Control never gets here */
2344     }
2345     #endif /* End of XCLASS */
2346    
2347     /* Match a single character, casefully */
2348    
2349     case OP_CHAR:
2350     #ifdef SUPPORT_UTF8
2351     if (utf8)
2352     {
2353     length = 1;
2354     ecode++;
2355     GETCHARLEN(fc, ecode, length);
2356 ph10 443 if (length > md->end_subject - eptr)
2357 ph10 428 {
2358     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2359     RRETURN(MATCH_NOMATCH);
2360 ph10 443 }
2361 nigel 77 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2362     }
2363     else
2364     #endif
2365    
2366     /* Non-UTF-8 mode */
2367     {
2368 ph10 443 if (md->end_subject - eptr < 1)
2369 ph10 428 {
2370     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2371     RRETURN(MATCH_NOMATCH);
2372 ph10 443 }
2373 nigel 77 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2374     ecode += 2;
2375     }
2376     break;
2377    
2378     /* Match a single character, caselessly */
2379    
2380     case OP_CHARNC:
2381     #ifdef SUPPORT_UTF8
2382     if (utf8)
2383     {
2384     length = 1;
2385     ecode++;
2386     GETCHARLEN(fc, ecode, length);
2387    
2388 ph10 443 if (length > md->end_subject - eptr)
2389 ph10 428 {
2390     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2391     RRETURN(MATCH_NOMATCH);
2392 ph10 443 }
2393 nigel 77
2394     /* If the pattern character's value is < 128, we have only one byte, and
2395     can use the fast lookup table. */
2396    
2397     if (fc < 128)
2398     {
2399     if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2400     }
2401    
2402     /* Otherwise we must pick up the subject character */
2403    
2404     else
2405     {
2406 nigel 93 unsigned int dc;
2407 nigel 77 GETCHARINC(dc, eptr);
2408     ecode += length;
2409    
2410     /* If we have Unicode property support, we can use it to test the other
2411 nigel 87 case of the character, if there is one. */
2412 nigel 77
2413     if (fc != dc)
2414     {
2415     #ifdef SUPPORT_UCP
2416 ph10 349 if (dc != UCD_OTHERCASE(fc))
2417 nigel 77 #endif
2418     RRETURN(MATCH_NOMATCH);
2419     }
2420     }
2421     }
2422     else
2423     #endif /* SUPPORT_UTF8 */
2424    
2425     /* Non-UTF-8 mode */
2426     {
2427 ph10 443 if (md->end_subject - eptr < 1)
2428 ph10 428 {
2429 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2430 ph10 428 RRETURN(MATCH_NOMATCH);
2431 ph10 443 }
2432 nigel 77 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2433     ecode += 2;
2434     }
2435     break;
2436    
2437 nigel 93 /* Match a single character repeatedly. */
2438 nigel 77
2439     case OP_EXACT:
2440     min = max = GET2(ecode, 1);
2441     ecode += 3;
2442     goto REPEATCHAR;
2443    
2444 nigel 93 case OP_POSUPTO:
2445     possessive = TRUE;
2446     /* Fall through */
2447    
2448 nigel 77 case OP_UPTO:
2449     case OP_MINUPTO:
2450     min = 0;
2451     max = GET2(ecode, 1);
2452     minimize = *ecode == OP_MINUPTO;
2453     ecode += 3;
2454     goto REPEATCHAR;
2455    
2456 nigel 93 case OP_POSSTAR:
2457     possessive = TRUE;
2458     min = 0;
2459     max = INT_MAX;
2460     ecode++;
2461     goto REPEATCHAR;
2462    
2463     case OP_POSPLUS:
2464     possessive = TRUE;
2465     min = 1;
2466     max = INT_MAX;
2467     ecode++;
2468     goto REPEATCHAR;
2469    
2470     case OP_POSQUERY:
2471     possessive = TRUE;
2472     min = 0;
2473     max = 1;
2474     ecode++;
2475     goto REPEATCHAR;
2476    
2477 nigel 77 case OP_STAR:
2478     case OP_MINSTAR:
2479     case OP_PLUS:
2480     case OP_MINPLUS:
2481     case OP_QUERY:
2482     case OP_MINQUERY:
2483     c = *ecode++ - OP_STAR;
2484     minimize = (c & 1) != 0;
2485 ph10 443
2486 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2487     max = rep_max[c]; /* zero for max => infinity */
2488     if (max == 0) max = INT_MAX;
2489    
2490 ph10 426 /* Common code for all repeated single-character matches. */
2491 nigel 77
2492     REPEATCHAR:
2493     #ifdef SUPPORT_UTF8
2494     if (utf8)
2495     {
2496     length = 1;
2497     charptr = ecode;
2498     GETCHARLEN(fc, ecode, length);
2499     ecode += length;
2500    
2501     /* Handle multibyte character matching specially here. There is
2502     support for caseless matching if UCP support is present. */
2503    
2504     if (length > 1)
2505     {
2506     #ifdef SUPPORT_UCP
2507 nigel 93 unsigned int othercase;
2508 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2509 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2510 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2511 ph10 115 else oclength = 0;
2512 nigel 77 #endif /* SUPPORT_UCP */
2513    
2514     for (i = 1; i <= min; i++)
2515     {
2516 ph10 426 if (eptr <= md->end_subject - length &&
2517     memcmp(eptr, charptr, length) == 0) eptr += length;
2518 ph10 123 #ifdef SUPPORT_UCP
2519 ph10 426 else if (oclength > 0 &&
2520     eptr <= md->end_subject - oclength &&
2521     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2522     #endif /* SUPPORT_UCP */
2523 nigel 77 else
2524     {
2525 ph10 426 CHECK_PARTIAL();
2526     RRETURN(MATCH_NOMATCH);
2527 nigel 77 }
2528     }
2529    
2530     if (min == max) continue;
2531    
2532     if (minimize)
2533     {
2534     for (fi = min;; fi++)
2535     {
2536 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2537 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2538 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2539 ph10 426 if (eptr <= md->end_subject - length &&
2540     memcmp(eptr, charptr, length) == 0) eptr += length;
2541 ph10 123 #ifdef SUPPORT_UCP
2542 ph10 426 else if (oclength > 0 &&
2543     eptr <= md->end_subject - oclength &&
2544     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2545     #endif /* SUPPORT_UCP */
2546 nigel 77 else
2547     {
2548 ph10 426 CHECK_PARTIAL();
2549     RRETURN(MATCH_NOMATCH);
2550 nigel 77 }
2551     }
2552     /* Control never gets here */
2553     }
2554 nigel 93
2555     else /* Maximize */
2556 nigel 77 {
2557     pp = eptr;
2558     for (i = min; i < max; i++)
2559     {
2560 ph10 426 if (eptr <= md->end_subject - length &&
2561     memcmp(eptr, charptr, length) == 0) eptr += length;
2562 ph10 123 #ifdef SUPPORT_UCP
2563 ph10 426 else if (oclength > 0 &&
2564     eptr <= md->end_subject - oclength &&
2565     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2566     #endif /* SUPPORT_UCP */
2567 ph10 115 else break;
2568 nigel 77 }
2569 nigel 93
2570     if (possessive) continue;
2571 ph10 427
2572 ph10 120 for(;;)
2573 ph10 426 {
2574     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2575     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2576     if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2577 ph10 115 #ifdef SUPPORT_UCP
2578 ph10 426 eptr--;
2579     BACKCHAR(eptr);
2580 ph10 123 #else /* without SUPPORT_UCP */
2581 ph10 426 eptr -= length;
2582 ph10 123 #endif /* SUPPORT_UCP */
2583 ph10 426 }
2584 nigel 77 }
2585     /* Control never gets here */
2586     }
2587    
2588     /* If the length of a UTF-8 character is 1, we fall through here, and
2589     obey the code as for non-UTF-8 characters below, though in this case the
2590     value of fc will always be < 128. */
2591     }
2592     else
2593     #endif /* SUPPORT_UTF8 */
2594    
2595     /* When not in UTF-8 mode, load a single-byte character. */
2596    
2597 ph10 426 fc = *ecode++;
2598 ph10 443
2599 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2600     may not be in UTF-8 mode. The code is duplicated for the caseless and
2601     caseful cases, for speed, since matching characters is likely to be quite
2602     common. First, ensure the minimum number of matches are present. If min =
2603     max, continue at the same level without recursing. Otherwise, if
2604     minimizing, keep trying the rest of the expression and advancing one
2605     matching character if failing, up to the maximum. Alternatively, if
2606     maximizing, find the maximum number of characters and work backwards. */
2607    
2608     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2609     max, eptr));
2610    
2611     if ((ims & PCRE_CASELESS) != 0)
2612     {
2613     fc = md->lcc[fc];
2614     for (i = 1; i <= min; i++)
2615 ph10 426 {
2616     if (eptr >= md->end_subject)
2617     {
2618     SCHECK_PARTIAL();
2619     RRETURN(MATCH_NOMATCH);
2620     }
2621 nigel 77 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2622 ph10 426 }
2623 nigel 77 if (min == max) continue;
2624     if (minimize)
2625     {
2626     for (fi = min;; fi++)
2627     {
2628 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2629 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2630 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2631 ph10 426 if (eptr >= md->end_subject)
2632     {
2633 ph10 427 SCHECK_PARTIAL();
2634 ph10 426 RRETURN(MATCH_NOMATCH);
2635     }
2636     if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2637 nigel 77 }
2638     /* Control never gets here */
2639     }
2640 nigel 93 else /* Maximize */
2641 nigel 77 {
2642     pp = eptr;
2643     for (i = min; i < max; i++)
2644     {
2645     if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2646     eptr++;
2647     }
2648 ph10 427
2649 nigel 93 if (possessive) continue;
2650 ph10 427
2651 nigel 77 while (eptr >= pp)
2652     {
2653 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2654 nigel 77 eptr--;
2655     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2656     }
2657     RRETURN(MATCH_NOMATCH);
2658     }
2659     /* Control never gets here */
2660     }
2661    
2662     /* Caseful comparisons (includes all multi-byte characters) */
2663    
2664     else
2665     {
2666 ph10 427 for (i = 1; i <= min; i++)
2667 ph10 426 {
2668     if (eptr >= md->end_subject)
2669     {
2670     SCHECK_PARTIAL();
2671     RRETURN(MATCH_NOMATCH);
2672     }
2673     if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2674 ph10 427 }
2675 ph10 443
2676 nigel 77 if (min == max) continue;
2677 ph10 443
2678 nigel 77 if (minimize)
2679     {
2680     for (fi = min;; fi++)
2681     {
2682 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2683 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2685 ph10 426 if (eptr >= md->end_subject)
2686 ph10 427 {
2687 ph10 426 SCHECK_PARTIAL();
2688     RRETURN(MATCH_NOMATCH);
2689 ph10 427 }
2690 ph10 426 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2691 nigel 77 }
2692     /* Control never gets here */
2693     }
2694 nigel 93 else /* Maximize */
2695 nigel 77 {
2696     pp = eptr;
2697     for (i = min; i < max; i++)
2698     {
2699     if (eptr >= md->end_subject || fc != *eptr) break;
2700     eptr++;
2701     }
2702 nigel 93 if (possessive) continue;
2703 ph10 443
2704 nigel 77 while (eptr >= pp)
2705     {
2706 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2707 nigel 77 eptr--;
2708     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2709     }
2710     RRETURN(MATCH_NOMATCH);
2711     }
2712     }
2713     /* Control never gets here */
2714    
2715     /* Match a negated single one-byte character. The character we are
2716     checking can be multibyte. */
2717    
2718     case OP_NOT:
2719 ph10 443 if (eptr >= md->end_subject)
2720 ph10 428 {
2721 ph10 443 SCHECK_PARTIAL();
2722 ph10 428 RRETURN(MATCH_NOMATCH);
2723 ph10 443 }
2724 nigel 77 ecode++;
2725     GETCHARINCTEST(c, eptr);
2726     if ((ims & PCRE_CASELESS) != 0)
2727     {
2728     #ifdef SUPPORT_UTF8
2729     if (c < 256)
2730     #endif
2731     c = md->lcc[c];
2732     if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2733     }
2734     else
2735     {
2736     if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2737     }
2738     break;
2739    
2740     /* Match a negated single one-byte character repeatedly. This is almost a
2741     repeat of the code for a repeated single character, but I haven't found a
2742     nice way of commoning these up that doesn't require a test of the
2743     positive/negative option for each character match. Maybe that wouldn't add
2744     very much to the time taken, but character matching *is* what this is all
2745     about... */
2746    
2747     case OP_NOTEXACT:
2748     min = max = GET2(ecode, 1);
2749     ecode += 3;
2750     goto REPEATNOTCHAR;
2751    
2752     case OP_NOTUPTO:
2753     case OP_NOTMINUPTO:
2754     min = 0;
2755     max = GET2(ecode, 1);
2756     minimize = *ecode == OP_NOTMINUPTO;
2757     ecode += 3;
2758     goto REPEATNOTCHAR;
2759    
2760 nigel 93 case OP_NOTPOSSTAR:
2761     possessive = TRUE;
2762     min = 0;
2763     max = INT_MAX;
2764     ecode++;
2765     goto REPEATNOTCHAR;
2766    
2767     case OP_NOTPOSPLUS:
2768     possessive = TRUE;
2769     min = 1;
2770     max = INT_MAX;
2771     ecode++;
2772     goto REPEATNOTCHAR;
2773    
2774     case OP_NOTPOSQUERY:
2775     possessive = TRUE;
2776     min = 0;
2777     max = 1;
2778     ecode++;
2779     goto REPEATNOTCHAR;
2780    
2781     case OP_NOTPOSUPTO:
2782     possessive = TRUE;
2783     min = 0;
2784     max = GET2(ecode, 1);
2785     ecode += 3;
2786     goto REPEATNOTCHAR;
2787    
2788 nigel 77 case OP_NOTSTAR:
2789     case OP_NOTMINSTAR:
2790     case OP_NOTPLUS:
2791     case OP_NOTMINPLUS:
2792     case OP_NOTQUERY:
2793     case OP_NOTMINQUERY:
2794     c = *ecode++ - OP_NOTSTAR;
2795     minimize = (c & 1) != 0;
2796     min = rep_min[c]; /* Pick up values from tables; */
2797     max = rep_max[c]; /* zero for max => infinity */
2798     if (max == 0) max = INT_MAX;
2799    
2800 ph10 426 /* Common code for all repeated single-byte matches. */
2801 nigel 77
2802     REPEATNOTCHAR:
2803     fc = *ecode++;
2804    
2805     /* The code is duplicated for the caseless and caseful cases, for speed,
2806     since matching characters is likely to be quite common. First, ensure the
2807     minimum number of matches are present. If min = max, continue at the same
2808     level without recursing. Otherwise, if minimizing, keep trying the rest of
2809     the expression and advancing one matching character if failing, up to the
2810     maximum. Alternatively, if maximizing, find the maximum number of
2811     characters and work backwards. */
2812    
2813     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2814     max, eptr));
2815    
2816     if ((ims & PCRE_CASELESS) != 0)
2817     {
2818     fc = md->lcc[fc];
2819    
2820     #ifdef SUPPORT_UTF8
2821     /* UTF-8 mode */
2822     if (utf8)
2823     {
2824 nigel 93 register unsigned int d;
2825 nigel 77 for (i = 1; i <= min; i++)
2826     {
2827 ph10 426 if (eptr >= md->end_subject)
2828     {
2829     SCHECK_PARTIAL();
2830 ph10 427 RRETURN(MATCH_NOMATCH);
2831     }
2832 nigel 77 GETCHARINC(d, eptr);
2833     if (d < 256) d = md->lcc[d];
2834     if (fc == d) RRETURN(MATCH_NOMATCH);
2835     }
2836     }
2837     else
2838     #endif
2839    
2840     /* Not UTF-8 mode */
2841     {
2842     for (i = 1; i <= min; i++)
2843 ph10 426 {
2844     if (eptr >= md->end_subject)
2845     {
2846     SCHECK_PARTIAL();
2847 ph10 427 RRETURN(MATCH_NOMATCH);
2848     }
2849 nigel 77 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2850 ph10 427 }
2851 nigel 77 }
2852    
2853     if (min == max) continue;
2854    
2855     if (minimize)
2856     {
2857     #ifdef SUPPORT_UTF8
2858     /* UTF-8 mode */
2859     if (utf8)
2860     {
2861 nigel 93 register unsigned int d;
2862 nigel 77 for (fi = min;; fi++)
2863     {
2864 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2865 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2866 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2867 ph10 427 if (eptr >= md->end_subject)
2868 ph10 426 {
2869 ph10 427 SCHECK_PARTIAL();
2870 ph10 426 RRETURN(MATCH_NOMATCH);
2871 ph10 427 }
2872 nigel 77 GETCHARINC(d, eptr);
2873     if (d < 256) d = md->lcc[d];
2874 ph10 366 if (fc == d) RRETURN(MATCH_NOMATCH);
2875 nigel 77 }
2876     }
2877     else
2878     #endif
2879     /* Not UTF-8 mode */
2880     {
2881     for (fi = min;; fi++)
2882     {
2883 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2884 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2886 ph10 426 if (eptr >= md->end_subject)
2887     {
2888     SCHECK_PARTIAL();
2889     RRETURN(MATCH_NOMATCH);
2890     }
2891     if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2892 nigel 77 }
2893     }
2894     /* Control never gets here */
2895     }
2896    
2897     /* Maximize case */
2898    
2899     else
2900     {
2901     pp = eptr;
2902    
2903     #ifdef SUPPORT_UTF8
2904     /* UTF-8 mode */
2905     if (utf8)
2906     {
2907 nigel 93 register unsigned int d;
2908 nigel 77 for (i = min; i < max; i++)
2909     {
2910     int len = 1;
2911     if (eptr >= md->end_subject) break;
2912     GETCHARLEN(d, eptr, len);
2913     if (d < 256) d = md->lcc[d];
2914     if (fc == d) break;
2915     eptr += len;
2916     }
2917 nigel 93 if (possessive) continue;
2918     for(;;)
2919 nigel 77 {
2920 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2921 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2922     if (eptr-- == pp) break; /* Stop if tried at original pos */
2923     BACKCHAR(eptr);
2924     }
2925     }
2926     else
2927     #endif
2928     /* Not UTF-8 mode */
2929     {
2930     for (i = min; i < max; i++)
2931     {
2932     if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2933     eptr++;
2934     }
2935 nigel 93 if (possessive) continue;
2936 nigel 77 while (eptr >= pp)
2937     {
2938 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2939 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940     eptr--;
2941     }
2942     }
2943    
2944     RRETURN(MATCH_NOMATCH);
2945     }
2946     /* Control never gets here */
2947     }
2948    
2949     /* Caseful comparisons */
2950    
2951     else
2952     {
2953     #ifdef SUPPORT_UTF8
2954     /* UTF-8 mode */
2955     if (utf8)
2956     {
2957 nigel 93 register unsigned int d;
2958 nigel 77 for (i = 1; i <= min; i++)
2959     {
2960 ph10 426 if (eptr >= md->end_subject)
2961     {
2962     SCHECK_PARTIAL();
2963 ph10 427 RRETURN(MATCH_NOMATCH);
2964     }
2965 nigel 77 GETCHARINC(d, eptr);
2966     if (fc == d) RRETURN(MATCH_NOMATCH);
2967     }
2968     }
2969     else
2970     #endif
2971     /* Not UTF-8 mode */
2972     {
2973     for (i = 1; i <= min; i++)
2974 ph10 426 {
2975     if (eptr >= md->end_subject)
2976     {
2977     SCHECK_PARTIAL();
2978 ph10 427 RRETURN(MATCH_NOMATCH);
2979     }
2980 nigel 77 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2981 ph10 427 }
2982 nigel 77 }
2983    
2984     if (min == max) continue;
2985    
2986     if (minimize)
2987     {
2988     #ifdef SUPPORT_UTF8
2989     /* UTF-8 mode */
2990     if (utf8)
2991     {
2992 nigel 93 register unsigned int d;
2993 nigel 77 for (fi = min;; fi++)
2994     {
2995 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2996 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
2998 ph10 427 if (eptr >= md->end_subject)
2999 ph10 426 {
3000 ph10 427 SCHECK_PARTIAL();
3001 ph10 426 RRETURN(MATCH_NOMATCH);
3002 ph10 427 }
3003 nigel 77 GETCHARINC(d, eptr);
3004 ph10 371 if (fc == d) RRETURN(MATCH_NOMATCH);
3005 nigel 77 }
3006     }
3007     else
3008     #endif
3009     /* Not UTF-8 mode */
3010     {
3011     for (fi = min;; fi++)
3012     {
3013 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3014 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3015 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3016 ph10 426 if (eptr >= md->end_subject)
3017     {
3018     SCHECK_PARTIAL();
3019     RRETURN(MATCH_NOMATCH);
3020 ph10 427 }
3021 ph10 426 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3022 nigel 77 }
3023     }
3024     /* Control never gets here */
3025     }
3026    
3027     /* Maximize case */
3028    
3029     else
3030     {
3031     pp = eptr;
3032    
3033     #ifdef SUPPORT_UTF8
3034     /* UTF-8 mode */
3035     if (utf8)
3036     {
3037 nigel 93 register unsigned int d;
3038 nigel 77 for (i = min; i < max; i++)
3039     {
3040     int len = 1;
3041     if (eptr >= md->end_subject) break;
3042     GETCHARLEN(d, eptr, len);
3043     if (fc == d) break;
3044     eptr += len;
3045     }
3046 nigel 93 if (possessive) continue;
3047 nigel 77 for(;;)
3048     {
3049 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3050 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3051     if (eptr-- == pp) break; /* Stop if tried at original pos */
3052     BACKCHAR(eptr);
3053     }
3054     }
3055     else
3056     #endif
3057     /* Not UTF-8 mode */
3058     {
3059     for (i = min; i < max; i++)
3060     {
3061     if (eptr >= md->end_subject || fc == *eptr) break;
3062     eptr++;
3063     }
3064 nigel 93 if (possessive) continue;
3065 nigel 77 while (eptr >= pp)
3066     {
3067 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3068 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3069     eptr--;
3070     }
3071     }
3072    
3073     RRETURN(MATCH_NOMATCH);
3074     }
3075     }
3076     /* Control never gets here */
3077    
3078     /* Match a single character type repeatedly; several different opcodes
3079     share code. This is very similar to the code for single characters, but we
3080     repeat it in the interests of efficiency. */
3081    
3082     case OP_TYPEEXACT:
3083     min = max = GET2(ecode, 1);
3084     minimize = TRUE;
3085     ecode += 3;
3086     goto REPEATTYPE;
3087    
3088     case OP_TYPEUPTO:
3089     case OP_TYPEMINUPTO:
3090     min = 0;
3091     max = GET2(ecode, 1);
3092     minimize = *ecode == OP_TYPEMINUPTO;
3093     ecode += 3;
3094     goto REPEATTYPE;
3095    
3096 nigel 93 case OP_TYPEPOSSTAR:
3097     possessive = TRUE;
3098     min = 0;
3099     max = INT_MAX;
3100     ecode++;
3101     goto REPEATTYPE;
3102    
3103     case OP_TYPEPOSPLUS:
3104     possessive = TRUE;
3105     min = 1;
3106     max = INT_MAX;
3107     ecode++;
3108     goto REPEATTYPE;
3109    
3110     case OP_TYPEPOSQUERY:
3111     possessive = TRUE;
3112     min = 0;
3113     max = 1;
3114     ecode++;
3115     goto REPEATTYPE;
3116    
3117     case OP_TYPEPOSUPTO:
3118     possessive = TRUE;
3119     min = 0;
3120     max = GET2(ecode, 1);
3121     ecode += 3;
3122     goto REPEATTYPE;
3123    
3124 nigel 77 case OP_TYPESTAR:
3125     case OP_TYPEMINSTAR:
3126     case OP_TYPEPLUS:
3127     case OP_TYPEMINPLUS:
3128     case OP_TYPEQUERY:
3129     case OP_TYPEMINQUERY:
3130     c = *ecode++ - OP_TYPESTAR;
3131     minimize = (c & 1) != 0;
3132     min = rep_min[c]; /* Pick up values from tables; */
3133     max = rep_max[c]; /* zero for max => infinity */
3134     if (max == 0) max = INT_MAX;
3135    
3136     /* Common code for all repeated single character type matches. Note that
3137     in UTF-8 mode, '.' matches a character of any length, but for the other
3138     character types, the valid characters are all one-byte long. */
3139    
3140     REPEATTYPE:
3141     ctype = *ecode++; /* Code for the character type */
3142    
3143     #ifdef SUPPORT_UCP
3144     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3145     {
3146     prop_fail_result = ctype == OP_NOTPROP;
3147     prop_type = *ecode++;
3148 nigel 87 prop_value = *ecode++;
3149 nigel 77 }
3150     else prop_type = -1;
3151     #endif
3152    
3153     /* First, ensure the minimum number of matches are present. Use inline
3154     code for maximizing the speed, and do the type test once at the start
3155 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3156 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3157     and single-bytes. */
3158    
3159     if (min > 0)
3160     {
3161     #ifdef SUPPORT_UCP
3162 nigel 87 if (prop_type >= 0)
3163 nigel 77 {
3164 nigel 87 switch(prop_type)
3165 nigel 77 {
3166 nigel 87 case PT_ANY:
3167     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3168     for (i = 1; i <= min; i++)
3169     {
3170 ph10 427 if (eptr >= md->end_subject)
3171 ph10 426 {
3172 ph10 427 SCHECK_PARTIAL();
3173 ph10 426 RRETURN(MATCH_NOMATCH);
3174 ph10 427 }
3175 ph10 184 GETCHARINCTEST(c, eptr);
3176 nigel 87 }
3177     break;
3178    
3179     case PT_LAMP:
3180     for (i = 1; i <= min; i++)
3181     {
3182 ph10 427 if (eptr >= md->end_subject)
3183 ph10 426 {
3184 ph10 427 SCHECK_PARTIAL();
3185 ph10 426 RRETURN(MATCH_NOMATCH);
3186 ph10 427 }
3187 ph10 184 GETCHARINCTEST(c, eptr);
3188 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3189 nigel 87 if ((prop_chartype == ucp_Lu ||
3190     prop_chartype == ucp_Ll ||
3191     prop_chartype == ucp_Lt) == prop_fail_result)
3192     RRETURN(MATCH_NOMATCH);
3193     }
3194     break;
3195    
3196     case PT_GC:
3197     for (i = 1; i <= min; i++)
3198     {
3199 ph10 427 if (eptr >= md->end_subject)
3200 ph10 426 {
3201 ph10 427 SCHECK_PARTIAL();
3202 ph10 426 RRETURN(MATCH_NOMATCH);
3203 ph10 427 }
3204 ph10 184 GETCHARINCTEST(c, eptr);
3205 ph10 349 prop_category = UCD_CATEGORY(c);
3206 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3207     RRETURN(MATCH_NOMATCH);
3208     }
3209     break;
3210    
3211     case PT_PC:
3212     for (i = 1; i <= min; i++)
3213     {
3214 ph10 427 if (eptr >= md->end_subject)
3215 ph10 426 {
3216 ph10 427 SCHECK_PARTIAL();
3217 ph10 426 RRETURN(MATCH_NOMATCH);
3218 ph10 427 }
3219 ph10 184 GETCHARINCTEST(c, eptr);
3220 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3221 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3222     RRETURN(MATCH_NOMATCH);
3223     }
3224     break;
3225    
3226     case PT_SC:
3227     for (i = 1; i <= min; i++)
3228     {
3229 ph10 427 if (eptr >= md->end_subject)
3230 ph10 426 {
3231 ph10 427 SCHECK_PARTIAL();
3232 ph10 426 RRETURN(MATCH_NOMATCH);
3233 ph10 427 }
3234 ph10 184 GETCHARINCTEST(c, eptr);
3235 ph10 349 prop_script = UCD_SCRIPT(c);
3236 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3237     RRETURN(MATCH_NOMATCH);
3238     }
3239     break;
3240    
3241     default:
3242     RRETURN(PCRE_ERROR_INTERNAL);
3243 nigel 77 }
3244     }
3245    
3246     /* Match extended Unicode sequences. We will get here only if the
3247     support is in the binary; otherwise a compile-time error occurs. */
3248    
3249     else if (ctype == OP_EXTUNI)
3250     {
3251     for (i = 1; i <= min; i++)
3252     {
3253 ph10 427 if (eptr >= md->end_subject)
3254 ph10 426 {
3255 ph10 427 SCHECK_PARTIAL();
3256 ph10 426 RRETURN(MATCH_NOMATCH);
3257 ph10 427 }
3258 nigel 77 GETCHARINCTEST(c, eptr);
3259 ph10 349 prop_category = UCD_CATEGORY(c);
3260 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3261     while (eptr < md->end_subject)
3262     {
3263     int len = 1;
3264 ph10 426 if (!utf8) c = *eptr;
3265     else { GETCHARLEN(c, eptr, len); }
3266 ph10 349 prop_category = UCD_CATEGORY(c);
3267 nigel 77 if (prop_category != ucp_M) break;
3268     eptr += len;
3269     }
3270     }
3271     }
3272    
3273     else
3274     #endif /* SUPPORT_UCP */
3275    
3276     /* Handle all other cases when the coding is UTF-8 */
3277    
3278     #ifdef SUPPORT_UTF8
3279     if (utf8) switch(ctype)
3280     {
3281     case OP_ANY:
3282     for (i = 1; i <= min; i++)
3283     {
3284 ph10 426 if (eptr >= md->end_subject)
3285     {
3286 ph10 427 SCHECK_PARTIAL();
3287 nigel 77 RRETURN(MATCH_NOMATCH);
3288 ph10 427 }
3289 ph10 426 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3290 nigel 91 eptr++;
3291 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3292     }
3293     break;
3294    
3295 ph10 341 case OP_ALLANY:
3296     for (i = 1; i <= min; i++)
3297     {
3298 ph10 427 if (eptr >= md->end_subject)
3299 ph10 426 {
3300     SCHECK_PARTIAL();
3301     RRETURN(MATCH_NOMATCH);
3302 ph10 427 }
3303 ph10 341 eptr++;
3304     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3305     }
3306     break;
3307    
3308 nigel 77 case OP_ANYBYTE:
3309 ph10 427 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3310 nigel 77 eptr += min;
3311     break;
3312    
3313 nigel 93 case OP_ANYNL:
3314     for (i = 1; i <= min; i++)
3315     {
3316 ph10 427 if (eptr >= md->end_subject)
3317 ph10 426 {
3318     SCHECK_PARTIAL();
3319     RRETURN(MATCH_NOMATCH);
3320 ph10 427 }
3321 nigel 93 GETCHARINC(c, eptr);
3322     switch(c)
3323     {
3324     default: RRETURN(MATCH_NOMATCH);
3325     case 0x000d:
3326     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3327     break;
3328 ph10 231
3329 nigel 93 case 0x000a:
3330 ph10 231 break;
3331    
3332 nigel 93 case 0x000b:
3333     case 0x000c:
3334     case 0x0085:
3335     case 0x2028:
3336     case 0x2029:
3337 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3338 nigel 93 break;
3339     }
3340     }
3341     break;
3342    
3343 ph10 178 case OP_NOT_HSPACE:
3344     for (i = 1; i <= min; i++)
3345     {
3346 ph10 427 if (eptr >= md->end_subject)
3347 ph10 426 {
3348     SCHECK_PARTIAL();
3349     RRETURN(MATCH_NOMATCH);
3350 ph10 427 }
3351 ph10 178 GETCHARINC(c, eptr);
3352     switch(c)
3353     {
3354     default: break;
3355     case 0x09: /* HT */
3356     case 0x20: /* SPACE */
3357     case 0xa0: /* NBSP */
3358     case 0x1680: /* OGHAM SPACE MARK */
3359     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3360     case 0x2000: /* EN QUAD */
3361     case 0x2001: /* EM QUAD */
3362     case 0x2002: /* EN SPACE */
3363     case 0x2003: /* EM SPACE */
3364     case 0x2004: /* THREE-PER-EM SPACE */
3365     case 0x2005: /* FOUR-PER-EM SPACE */
3366     case 0x2006: /* SIX-PER-EM SPACE */
3367     case 0x2007: /* FIGURE SPACE */
3368     case 0x2008: /* PUNCTUATION SPACE */
3369     case 0x2009: /* THIN SPACE */
3370     case 0x200A: /* HAIR SPACE */
3371     case 0x202f: /* NARROW NO-BREAK SPACE */
3372     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3373     case 0x3000: /* IDEOGRAPHIC SPACE */
3374     RRETURN(MATCH_NOMATCH);
3375     }
3376     }
3377     break;
3378 ph10 182
3379 ph10 178 case OP_HSPACE:
3380     for (i = 1; i <= min; i++)
3381     {
3382 ph10 427 if (eptr >= md->end_subject)
3383 ph10 426 {
3384 ph10 427 SCHECK_PARTIAL();
3385 ph10 426 RRETURN(MATCH_NOMATCH);
3386 ph10 427 }
3387 ph10 178 GETCHARINC(c, eptr);
3388     switch(c)
3389     {
3390     default: RRETURN(MATCH_NOMATCH);
3391     case 0x09: /* HT */
3392     case 0x20: /* SPACE */
3393     case 0xa0: /* NBSP */
3394     case 0x1680: /* OGHAM SPACE MARK */
3395     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3396     case 0x2000: /* EN QUAD */
3397     case 0x2001: /* EM QUAD */
3398     case 0x2002: /* EN SPACE */
3399     case 0x2003: /* EM SPACE */
3400     case 0x2004: /* THREE-PER-EM SPACE */
3401     case 0x2005: /* FOUR-PER-EM SPACE */
3402     case 0x2006: /* SIX-PER-EM SPACE */
3403     case 0x2007: /* FIGURE SPACE */
3404     case 0x2008: /* PUNCTUATION SPACE */
3405     case 0x2009: /* THIN SPACE */
3406     case 0x200A: /* HAIR SPACE */
3407     case 0x202f: /* NARROW NO-BREAK SPACE */
3408     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3409     case 0x3000: /* IDEOGRAPHIC SPACE */
3410     break;
3411     }
3412     }
3413     break;
3414 ph10 182
3415 ph10 178 case OP_NOT_VSPACE:
3416     for (i = 1; i <= min; i++)
3417     {
3418 ph10 427 if (eptr >= md->end_subject)
3419 ph10 426 {
3420 ph10 427 SCHECK_PARTIAL();
3421 ph10 426 RRETURN(MATCH_NOMATCH);
3422 ph10 427 }
3423 ph10 178 GETCHARINC(c, eptr);
3424     switch(c)
3425     {
3426     default: break;
3427     case 0x0a: /* LF */
3428     case 0x0b: /* VT */
3429     case 0x0c: /* FF */
3430     case 0x0d: /* CR */
3431     case 0x85: /* NEL */
3432     case 0x2028: /* LINE SEPARATOR */
3433     case 0x2029: /* PARAGRAPH SEPARATOR */
3434     RRETURN(MATCH_NOMATCH);
3435     }
3436     }
3437     break;
3438 ph10 182
3439 ph10 178 case OP_VSPACE:
3440     for (i = 1; i <= min; i++)
3441     {
3442 ph10 427 if (eptr >= md->end_subject)
3443 ph10 426 {
3444 ph10 427 SCHECK_PARTIAL();
3445 ph10 426 RRETURN(MATCH_NOMATCH);
3446 ph10 427 }
3447 ph10 178 GETCHARINC(c, eptr);
3448     switch(c)
3449     {
3450     default: RRETURN(MATCH_NOMATCH);
3451     case 0x0a: /* LF */
3452     case 0x0b: /* VT */
3453     case 0x0c: /* FF */
3454     case 0x0d: /* CR */
3455     case 0x85: /* NEL */
3456     case 0x2028: /* LINE SEPARATOR */
3457     case 0x2029: /* PARAGRAPH SEPARATOR */
3458 ph10 182 break;
3459 ph10 178 }
3460     }
3461     break;
3462    
3463 nigel 77 case OP_NOT_DIGIT:
3464     for (i = 1; i <= min; i++)
3465     {
3466 ph10 427 if (eptr >= md->end_subject)
3467 ph10 426 {
3468 ph10 427 SCHECK_PARTIAL();
3469 ph10 426 RRETURN(MATCH_NOMATCH);
3470 ph10 427 }
3471 nigel 77 GETCHARINC(c, eptr);
3472     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3473     RRETURN(MATCH_NOMATCH);
3474     }
3475     break;
3476    
3477     case OP_DIGIT:
3478     for (i = 1; i <= min; i++)
3479     {
3480 ph10 427 if (eptr >= md->end_subject)
3481 ph10 426 {
3482 ph10 427 SCHECK_PARTIAL();
3483 nigel 77 RRETURN(MATCH_NOMATCH);
3484 ph10 427 }
3485 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3486     RRETURN(MATCH_NOMATCH);
3487 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3488     }
3489     break;
3490    
3491     case OP_NOT_WHITESPACE:
3492     for (i = 1; i <= min; i++)
3493     {
3494 ph10 427 if (eptr >= md->end_subject)
3495 ph10 426 {
3496 ph10 427 SCHECK_PARTIAL();
3497 nigel 77 RRETURN(MATCH_NOMATCH);
3498 ph10 427 }
3499 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3500     RRETURN(MATCH_NOMATCH);
3501 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3502 nigel 77 }
3503     break;
3504    
3505     case OP_WHITESPACE:
3506     for (i = 1; i <= min; i++)
3507     {
3508 ph10 427 if (eptr >= md->end_subject)
3509 ph10 426 {
3510 ph10 427 SCHECK_PARTIAL();
3511 nigel 77 RRETURN(MATCH_NOMATCH);
3512 ph10 427 }
3513 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3514     RRETURN(MATCH_NOMATCH);
3515 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3516     }
3517     break;
3518    
3519     case OP_NOT_WORDCHAR:
3520     for (i = 1; i <= min; i++)
3521     {
3522     if (eptr >= md->end_subject ||
3523 ph10 219 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3524 nigel 77 RRETURN(MATCH_NOMATCH);
3525 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3526 nigel 77 }
3527     break;
3528    
3529     case OP_WORDCHAR:
3530     for (i = 1; i <= min; i++)
3531     {
3532 ph10 427 if (eptr >= md->end_subject)
3533 ph10 426 {
3534 ph10 427 SCHECK_PARTIAL();
3535 nigel 77 RRETURN(MATCH_NOMATCH);
3536 ph10 427 }
3537 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3538     RRETURN(MATCH_NOMATCH);
3539 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3540     }
3541     break;
3542    
3543     default:
3544     RRETURN(PCRE_ERROR_INTERNAL);
3545     } /* End switch(ctype) */
3546    
3547     else
3548     #endif /* SUPPORT_UTF8 */
3549    
3550     /* Code for the non-UTF-8 case for minimum matching of operators other
3551 ph10 426 than OP_PROP and OP_NOTPROP. */
3552 nigel 77
3553     switch(ctype)
3554     {
3555     case OP_ANY:
3556 ph10 342 for (i = 1; i <= min; i++)
3557 nigel 77 {
3558 ph10 427 if (eptr >= md->end_subject)
3559 ph10 426 {
3560 ph10 427 SCHECK_PARTIAL();
3561 ph10 426 RRETURN(MATCH_NOMATCH);
3562 ph10 427 }
3563 ph10 342 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3564     eptr++;
3565 nigel 77 }
3566     break;
3567    
3568 ph10 341 case OP_ALLANY:
3569 ph10 443 if (eptr > md->end_subject - min)
3570 ph10 428 {
3571 ph10 443 SCHECK_PARTIAL();
3572 ph10 428 RRETURN(MATCH_NOMATCH);
3573 ph10 443 }
3574 ph10 341 eptr += min;
3575     break;
3576    
3577 nigel 77 case OP_ANYBYTE:
3578 ph10 443 if (eptr > md->end_subject - min)
3579 ph10 428 {
3580 ph10 443 SCHECK_PARTIAL();
3581 ph10 428 RRETURN(MATCH_NOMATCH);
3582 ph10 443 }
3583 nigel 77 eptr += min;
3584     break;
3585    
3586 nigel 93 case OP_ANYNL:
3587     for (i = 1; i <= min; i++)
3588     {
3589 ph10 427 if (eptr >= md->end_subject)
3590 ph10 426 {
3591 ph10 427 SCHECK_PARTIAL();
3592 ph10 426 RRETURN(MATCH_NOMATCH);
3593 ph10 427 }
3594 nigel 93 switch(*eptr++)
3595     {
3596     default: RRETURN(MATCH_NOMATCH);
3597     case 0x000d:
3598     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3599     break;
3600     case 0x000a:
3601 ph10 231 break;
3602    
3603 nigel 93 case 0x000b:
3604     case 0x000c:
3605     case 0x0085:
3606 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3607 nigel 93 break;
3608     }
3609     }
3610     break;
3611    
3612 ph10 178 case OP_NOT_HSPACE:
3613     for (i = 1; i <= min; i++)
3614     {
3615 ph10 427 if (eptr >= md->end_subject)
3616 ph10 426 {
3617 ph10 427 SCHECK_PARTIAL();
3618 ph10 426 RRETURN(MATCH_NOMATCH);
3619 ph10 427 }
3620 ph10 178 switch(*eptr++)
3621     {
3622     default: break;
3623     case 0x09: /* HT */
3624     case 0x20: /* SPACE */
3625     case 0xa0: /* NBSP */
3626     RRETURN(MATCH_NOMATCH);
3627     }
3628     }
3629     break;
3630    
3631     case OP_HSPACE:
3632     for (i = 1; i <= min; i++)
3633     {
3634 ph10 427 if (eptr >= md->end_subject)
3635 ph10 426 {
3636 ph10 427 SCHECK_PARTIAL();
3637 ph10 426 RRETURN(MATCH_NOMATCH);
3638 ph10 427 }
3639 ph10 178 switch(*eptr++)
3640     {
3641     default: RRETURN(MATCH_NOMATCH);
3642     case 0x09: /* HT */
3643     case 0x20: /* SPACE */
3644     case 0xa0: /* NBSP */
3645 ph10 182 break;
3646 ph10 178 }
3647     }
3648     break;
3649    
3650     case OP_NOT_VSPACE:
3651     for (i = 1; i <= min; i++)
3652     {
3653 ph10 427 if (eptr >= md->end_subject)
3654 ph10 426 {
3655 ph10 427 SCHECK_PARTIAL();
3656 ph10 426 RRETURN(MATCH_NOMATCH);
3657 ph10 427 }
3658 ph10 178 switch(*eptr++)
3659     {
3660     default: break;
3661     case 0x0a: /* LF */
3662     case 0x0b: /* VT */
3663     case 0x0c: /* FF */
3664     case 0x0d: /* CR */
3665     case 0x85: /* NEL */
3666     RRETURN(MATCH_NOMATCH);
3667     }
3668     }
3669     break;
3670    
3671     case OP_VSPACE:
3672     for (i = 1; i <= min; i++)
3673     {
3674 ph10 427 if (eptr >= md->end_subject)
3675 ph10 426 {
3676 ph10 427 SCHECK_PARTIAL();
3677 ph10 426 RRETURN(MATCH_NOMATCH);
3678 ph10 427 }
3679 ph10 178 switch(*eptr++)
3680     {
3681     default: RRETURN(MATCH_NOMATCH);
3682     case 0x0a: /* LF */
3683     case 0x0b: /* VT */
3684     case 0x0c: /* FF */
3685     case 0x0d: /* CR */
3686     case 0x85: /* NEL */
3687 ph10 182 break;
3688 ph10 178 }
3689     }
3690     break;
3691    
3692 nigel 77 case OP_NOT_DIGIT:
3693     for (i = 1; i <= min; i++)
3694 ph10 427 {
3695     if (eptr >= md->end_subject)
3696 ph10 426 {
3697 ph10 427 SCHECK_PARTIAL();
3698 ph10 426 RRETURN(MATCH_NOMATCH);
3699 ph10 427 }
3700 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3701 ph10 427 }
3702 nigel 77 break;
3703    
3704     case OP_DIGIT:
3705     for (i = 1; i <= min; i++)
3706 ph10 427 {
3707     if (eptr >= md->end_subject)
3708 ph10 426 {
3709 ph10 427 SCHECK_PARTIAL();
3710 ph10 426 RRETURN(MATCH_NOMATCH);
3711 ph10 427 }
3712 nigel 77 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3713 ph10 427 }
3714 nigel 77 break;
3715    
3716     case OP_NOT_WHITESPACE:
3717     for (i = 1; i <= min; i++)
3718 ph10 427 {
3719     if (eptr >= md->end_subject)
3720 ph10 426 {
3721 ph10 427 SCHECK_PARTIAL();
3722 ph10 426 RRETURN(MATCH_NOMATCH);
3723 ph10 427 }
3724 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3725 ph10 427 }
3726 nigel 77 break;
3727    
3728     case OP_WHITESPACE:
3729     for (i = 1; i <= min; i++)
3730 ph10 427 {
3731     if (eptr >= md->end_subject)
3732 ph10 426 {
3733 ph10 427 SCHECK_PARTIAL();
3734 ph10 426 RRETURN(MATCH_NOMATCH);
3735 ph10 427 }
3736 nigel 77 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3737 ph10 427 }
3738 nigel 77 break;
3739    
3740     case OP_NOT_WORDCHAR:
3741     for (i = 1; i <= min; i++)
3742 ph10 427 {
3743     if (eptr >= md->end_subject)
3744 ph10 426 {
3745 ph10 427 SCHECK_PARTIAL();
3746 ph10 426 RRETURN(MATCH_NOMATCH);
3747 ph10 427 }
3748 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3749     RRETURN(MATCH_NOMATCH);
3750 ph10 427 }
3751 nigel 77 break;
3752    
3753     case OP_WORDCHAR:
3754     for (i = 1; i <= min; i++)
3755 ph10 427 {
3756     if (eptr >= md->end_subject)
3757 ph10 426 {
3758 ph10 427 SCHECK_PARTIAL();
3759 ph10 426 RRETURN(MATCH_NOMATCH);
3760 ph10 427 }
3761 nigel 77 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3762     RRETURN(MATCH_NOMATCH);
3763 ph10 427 }
3764 nigel 77 break;
3765    
3766     default:
3767     RRETURN(PCRE_ERROR_INTERNAL);
3768     }
3769     }
3770    
3771     /* If min = max, continue at the same level without recursing */
3772    
3773     if (min == max) continue;
3774    
3775     /* If minimizing, we have to test the rest of the pattern before each
3776     subsequent match. Again, separate the UTF-8 case for speed, and also
3777     separate the UCP cases. */
3778    
3779     if (minimize)
3780     {
3781     #ifdef SUPPORT_UCP
3782 nigel 87 if (prop_type >= 0)
3783 nigel 77 {
3784 nigel 87 switch(prop_type)
3785 nigel 77 {
3786 nigel 87 case PT_ANY:
3787     for (fi = min;; fi++)
3788     {
3789 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3790 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3792 ph10 427 if (eptr >= md->end_subject)
3793 ph10 426 {
3794 ph10 427 SCHECK_PARTIAL();
3795 ph10 426 RRETURN(MATCH_NOMATCH);
3796 ph10 427 }
3797 nigel 87 GETCHARINC(c, eptr);
3798     if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3799     }
3800 nigel 93 /* Control never gets here */
3801 nigel 87
3802     case PT_LAMP:
3803     for (fi = min;; fi++)
3804     {
3805 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3806 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3808 ph10 427 if (eptr >= md->end_subject)
3809 ph10 426 {
3810 ph10 427 SCHECK_PARTIAL();
3811 ph10 426 RRETURN(MATCH_NOMATCH);
3812 ph10 427 }
3813 nigel 87 GETCHARINC(c, eptr);
3814 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3815 nigel 87 if ((prop_chartype == ucp_Lu ||
3816     prop_chartype == ucp_Ll ||
3817     prop_chartype == ucp_Lt) == prop_fail_result)
3818     RRETURN(MATCH_NOMATCH);
3819     }
3820 nigel 93 /* Control never gets here */
3821 nigel 87
3822     case PT_GC:
3823     for (fi = min;; fi++)
3824     {
3825 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3826 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3827 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3828 ph10 427 if (eptr >= md->end_subject)
3829 ph10 426 {
3830 ph10 427 SCHECK_PARTIAL();
3831 ph10 426 RRETURN(MATCH_NOMATCH);
3832 ph10 427 }
3833 nigel 87 GETCHARINC(c, eptr);
3834 ph10 349 prop_category = UCD_CATEGORY(c);
3835 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3836     RRETURN(MATCH_NOMATCH);
3837     }
3838 nigel 93 /* Control never gets here */
3839 nigel 87
3840     case PT_PC:
3841     for (fi = min;; fi++)
3842     {
3843 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3844 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3845 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3846 ph10 427 if (eptr >= md->end_subject)
3847 ph10 426 {
3848 ph10 427 SCHECK_PARTIAL();
3849 ph10 426 RRETURN(MATCH_NOMATCH);
3850 ph10 427 }
3851 nigel 87 GETCHARINC(c, eptr);
3852 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3853 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3854     RRETURN(MATCH_NOMATCH);
3855     }
3856 nigel 93 /* Control never gets here */
3857 nigel 87
3858     case PT_SC:
3859     for (fi = min;; fi++)
3860     {
3861 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3862 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3863 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3864 ph10 427 if (eptr >= md->end_subject)
3865 ph10 426 {
3866 ph10 427 SCHECK_PARTIAL();
3867 ph10 426 RRETURN(MATCH_NOMATCH);
3868 ph10 427 }
3869 nigel 87 GETCHARINC(c, eptr);
3870 ph10 349 prop_script = UCD_SCRIPT(c);
3871 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3872     RRETURN(MATCH_NOMATCH);
3873     }
3874 nigel 93 /* Control never gets here */
3875 nigel 87
3876     default:
3877     RRETURN(PCRE_ERROR_INTERNAL);
3878 nigel 77 }
3879     }
3880    
3881     /* Match extended Unicode sequences. We will get here only if the
3882     support is in the binary; otherwise a compile-time error occurs. */
3883    
3884     else if (ctype == OP_EXTUNI)
3885     {
3886     for (fi = min;; fi++)
3887     {
3888 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3889 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3891 ph10 427 if (eptr >= md->end_subject)
3892 ph10 426 {
3893 ph10 427 SCHECK_PARTIAL();
3894 ph10 426 RRETURN(MATCH_NOMATCH);
3895 ph10 427 }
3896 nigel 77 GETCHARINCTEST(c, eptr);
3897 ph10 349 prop_category = UCD_CATEGORY(c);
3898 nigel 77 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3899     while (eptr < md->end_subject)
3900     {
3901     int len = 1;
3902 ph10 426 if (!utf8) c = *eptr;
3903     else { GETCHARLEN(c, eptr, len); }
3904 ph10 349 prop_category = UCD_CATEGORY(c);
3905 nigel 77 if (prop_category != ucp_M) break;
3906     eptr += len;
3907     }
3908     }
3909     }
3910    
3911     else
3912     #endif /* SUPPORT_UCP */
3913    
3914     #ifdef SUPPORT_UTF8
3915     /* UTF-8 mode */
3916     if (utf8)
3917     {
3918     for (fi = min;; fi++)
3919     {
3920 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3921 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3922 ph10 428 if (fi >= max) RRETURN(MATCH_NOMATCH);
3923 ph10 427 if (eptr >= md->end_subject)
3924 ph10 426 {
3925 ph10 427 SCHECK_PARTIAL();
3926 ph10 426 RRETURN(MATCH_NOMATCH);
3927 ph10 427 }
3928 ph10 426 if (ctype == OP_ANY && IS_NEWLINE(eptr))
3929     RRETURN(MATCH_NOMATCH);
3930 nigel 77 GETCHARINC(c, eptr);
3931     switch(ctype)
3932     {
3933 ph10 342 case OP_ANY: /* This is the non-NL case */
3934 ph10 345 case OP_ALLANY:
3935 nigel 77 case OP_ANYBYTE:
3936     break;
3937    
3938 nigel 93 case OP_ANYNL:
3939     switch(c)
3940     {
3941     default: RRETURN(MATCH_NOMATCH);
3942     case 0x000d:
3943     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3944     break;
3945     case 0x000a:
3946 ph10 231 break;
3947    
3948 nigel 93 case 0x000b:
3949     case 0x000c:
3950     case 0x0085:
3951     case 0x2028:
3952     case 0x2029:
3953 ph10 231 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3954 nigel 93 break;
3955     }
3956     break;
3957    
3958 ph10 178 case OP_NOT_HSPACE:
3959     switch(c)
3960     {
3961     default: break;
3962     case 0x09: /* HT */
3963     case 0x20: /* SPACE */
3964     case 0xa0: /* NBSP */
3965     case 0x1680: /* OGHAM SPACE MARK */
3966     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3967     case 0x2000: /* EN QUAD */
3968     case 0x2001: /* EM QUAD */
3969     case 0x2002: /* EN SPACE */
3970     case 0x2003: /* EM SPACE */
3971     case 0x2004: /* THREE-PER-EM SPACE */
3972     case 0x2005: /* FOUR-PER-EM SPACE */
3973     case 0x2006: /* SIX-PER-EM SPACE */
3974     case 0x2007: /* FIGURE SPACE */
3975     case 0x2008: /* PUNCTUATION SPACE */
3976     case 0x2009: /* THIN SPACE */
3977     case 0x200A: /* HAIR SPACE */
3978     case 0x202f: /* NARROW NO-BREAK SPACE */
3979     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3980     case 0x3000: /* IDEOGRAPHIC SPACE */
3981     RRETURN(MATCH_NOMATCH);
3982     }
3983     break;
3984    
3985     case OP_HSPACE:
3986     switch(c)