/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 511 - (hide annotations) (download)
Mon Mar 29 09:25:38 2010 UTC (4 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 174742 byte(s)
Make (*ACCEPT) work inside an atomic group.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 212 RM51, RM52, RM53, RM54 };
259 ph10 164
260 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
261 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
262 ph10 501 actually used in this definition. */
263 nigel 77
264     #ifndef NO_RECURSE
265     #define REGISTER register
266 ph10 164
267 ph10 475 #ifdef PCRE_DEBUG
268 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
269 nigel 87 { \
270     printf("match() called in line %d\n", __LINE__); \
271 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
272 nigel 87 printf("to line %d\n", __LINE__); \
273     }
274     #define RRETURN(ra) \
275     { \
276     printf("match() returned %d from line %d ", ra, __LINE__); \
277     return ra; \
278     }
279     #else
280 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
281 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
282 nigel 77 #define RRETURN(ra) return ra
283 nigel 87 #endif
284    
285 nigel 77 #else
286    
287    
288 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
289     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
290     argument of match(), which never changes. */
291 nigel 77
292     #define REGISTER
293    
294 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
295 nigel 77 {\
296     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
297 ph10 164 frame->Xwhere = rw; \
298     newframe->Xeptr = ra;\
299     newframe->Xecode = rb;\
300 ph10 168 newframe->Xmstart = mstart;\
301 ph10 501 newframe->Xmarkptr = markptr;\
302 ph10 164 newframe->Xoffset_top = rc;\
303     newframe->Xims = re;\
304     newframe->Xeptrb = rf;\
305     newframe->Xflags = rg;\
306     newframe->Xrdepth = frame->Xrdepth + 1;\
307     newframe->Xprevframe = frame;\
308     frame = newframe;\
309     DPRINTF(("restarting from line %d\n", __LINE__));\
310     goto HEAP_RECURSE;\
311     L_##rw:\
312     DPRINTF(("jumped back to line %d\n", __LINE__));\
313 nigel 77 }
314    
315     #define RRETURN(ra)\
316     {\
317     heapframe *newframe = frame;\
318     frame = newframe->Xprevframe;\
319     (pcre_stack_free)(newframe);\
320     if (frame != NULL)\
321     {\
322 ph10 164 rrc = ra;\
323     goto HEAP_RETURN;\
324 nigel 77 }\
325     return ra;\
326     }
327    
328    
329     /* Structure for remembering the local variables in a private frame */
330    
331     typedef struct heapframe {
332     struct heapframe *Xprevframe;
333    
334     /* Function arguments that may change */
335    
336 ph10 409 USPTR Xeptr;
337 nigel 77 const uschar *Xecode;
338 ph10 409 USPTR Xmstart;
339 ph10 501 USPTR Xmarkptr;
340 nigel 77 int Xoffset_top;
341     long int Xims;
342     eptrblock *Xeptrb;
343     int Xflags;
344 nigel 91 unsigned int Xrdepth;
345 nigel 77
346     /* Function local variables */
347    
348 ph10 409 USPTR Xcallpat;
349 ph10 406 #ifdef SUPPORT_UTF8
350 ph10 409 USPTR Xcharptr;
351 ph10 406 #endif
352 ph10 409 USPTR Xdata;
353     USPTR Xnext;
354     USPTR Xpp;
355     USPTR Xprev;
356     USPTR Xsaved_eptr;
357 nigel 77
358     recursion_info Xnew_recursive;
359    
360     BOOL Xcur_is_word;
361     BOOL Xcondition;
362     BOOL Xprev_is_word;
363    
364     unsigned long int Xoriginal_ims;
365    
366     #ifdef SUPPORT_UCP
367     int Xprop_type;
368 nigel 87 int Xprop_value;
369 nigel 77 int Xprop_fail_result;
370     int Xprop_category;
371     int Xprop_chartype;
372 nigel 87 int Xprop_script;
373 ph10 123 int Xoclength;
374     uschar Xocchars[8];
375 nigel 77 #endif
376    
377 ph10 403 int Xcodelink;
378 nigel 77 int Xctype;
379 nigel 93 unsigned int Xfc;
380 nigel 77 int Xfi;
381     int Xlength;
382     int Xmax;
383     int Xmin;
384     int Xnumber;
385     int Xoffset;
386     int Xop;
387     int Xsave_capture_last;
388     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
389     int Xstacksave[REC_STACK_SAVE_MAX];
390    
391     eptrblock Xnewptrb;
392    
393 ph10 164 /* Where to jump back to */
394 nigel 77
395 ph10 164 int Xwhere;
396 ph10 165
397 nigel 77 } heapframe;
398    
399     #endif
400    
401    
402     /***************************************************************************
403     ***************************************************************************/
404    
405    
406    
407     /*************************************************
408     * Match from current position *
409     *************************************************/
410    
411 nigel 93 /* This function is called recursively in many circumstances. Whenever it
412 nigel 77 returns a negative (error) response, the outer incarnation must also return the
413 ph10 426 same response. */
414 nigel 77
415 ph10 426 /* These macros pack up tests that are used for partial matching, and which
416     appears several times in the code. We set the "hit end" flag if the pointer is
417     at the end of the subject and also past the start of the subject (i.e.
418 ph10 427 something has been matched). For hard partial matching, we then return
419     immediately. The second one is used when we already know we are past the end of
420     the subject. */
421 ph10 426
422     #define CHECK_PARTIAL()\
423 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
424 ph10 427 {\
425     md->hitend = TRUE;\
426 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
427 ph10 427 }
428 ph10 426
429     #define SCHECK_PARTIAL()\
430 ph10 462 if (md->partial != 0 && eptr > mstart)\
431 ph10 427 {\
432     md->hitend = TRUE;\
433 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
434 ph10 427 }
435 ph10 426
436 ph10 427
437 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
438     the md structure (e.g. utf8, end_subject) into individual variables to improve
439 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
440     made performance worse.
441    
442     Arguments:
443 nigel 93 eptr pointer to current character in subject
444     ecode pointer to current position in compiled code
445 ph10 168 mstart pointer to the current match start position (can be modified
446 ph10 172 by encountering \K)
447 ph10 501 markptr pointer to the most recent MARK name, or NULL
448 nigel 77 offset_top current top pointer
449     md pointer to "static" info for the match
450     ims current /i, /m, and /s options
451     eptrb pointer to chain of blocks containing eptr at start of
452     brackets - for testing for empty matches
453     flags can contain
454     match_condassert - this is an assertion condition
455 nigel 93 match_cbegroup - this is the start of an unlimited repeat
456     group that can match an empty string
457 nigel 87 rdepth the recursion depth
458 nigel 77
459     Returns: MATCH_MATCH if matched ) these values are >= 0
460     MATCH_NOMATCH if failed to match )
461 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
462 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
463 nigel 87 (e.g. stopped by repeated call or recursion limit)
464 nigel 77 */
465    
466     static int
467 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
468     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
469 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
470 nigel 77 {
471     /* These variables do not need to be preserved over recursion in this function,
472 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
473     "register" because they are used a lot in loops. */
474 nigel 77
475 nigel 91 register int rrc; /* Returns from recursive calls */
476     register int i; /* Used for loops not involving calls to RMATCH() */
477 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
478 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
479 nigel 77
480 nigel 93 BOOL minimize, possessive; /* Quantifier options */
481 ph10 403 int condcode;
482 nigel 93
483 nigel 77 /* When recursion is not being used, all "local" variables that have to be
484     preserved over calls to RMATCH() are part of a "frame" which is obtained from
485     heap storage. Set up the top-level frame here; others are obtained from the
486     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
487    
488     #ifdef NO_RECURSE
489     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
490     frame->Xprevframe = NULL; /* Marks the top level */
491    
492     /* Copy in the original argument variables */
493    
494     frame->Xeptr = eptr;
495     frame->Xecode = ecode;
496 ph10 168 frame->Xmstart = mstart;
497 ph10 501 frame->Xmarkptr = markptr;
498 nigel 77 frame->Xoffset_top = offset_top;
499     frame->Xims = ims;
500     frame->Xeptrb = eptrb;
501     frame->Xflags = flags;
502 nigel 87 frame->Xrdepth = rdepth;
503 nigel 77
504     /* This is where control jumps back to to effect "recursion" */
505    
506     HEAP_RECURSE:
507    
508     /* Macros make the argument variables come from the current frame */
509    
510     #define eptr frame->Xeptr
511     #define ecode frame->Xecode
512 ph10 168 #define mstart frame->Xmstart
513 ph10 501 #define markptr frame->Xmarkptr
514 nigel 77 #define offset_top frame->Xoffset_top
515     #define ims frame->Xims
516     #define eptrb frame->Xeptrb
517     #define flags frame->Xflags
518 nigel 87 #define rdepth frame->Xrdepth
519 nigel 77
520     /* Ditto for the local variables */
521    
522     #ifdef SUPPORT_UTF8
523     #define charptr frame->Xcharptr
524     #endif
525     #define callpat frame->Xcallpat
526 ph10 403 #define codelink frame->Xcodelink
527 nigel 77 #define data frame->Xdata
528     #define next frame->Xnext
529     #define pp frame->Xpp
530     #define prev frame->Xprev
531     #define saved_eptr frame->Xsaved_eptr
532    
533     #define new_recursive frame->Xnew_recursive
534    
535     #define cur_is_word frame->Xcur_is_word
536     #define condition frame->Xcondition
537     #define prev_is_word frame->Xprev_is_word
538    
539     #define original_ims frame->Xoriginal_ims
540    
541     #ifdef SUPPORT_UCP
542     #define prop_type frame->Xprop_type
543 nigel 87 #define prop_value frame->Xprop_value
544 nigel 77 #define prop_fail_result frame->Xprop_fail_result
545     #define prop_category frame->Xprop_category
546     #define prop_chartype frame->Xprop_chartype
547 nigel 87 #define prop_script frame->Xprop_script
548 ph10 115 #define oclength frame->Xoclength
549     #define occhars frame->Xocchars
550 nigel 77 #endif
551    
552     #define ctype frame->Xctype
553     #define fc frame->Xfc
554     #define fi frame->Xfi
555     #define length frame->Xlength
556     #define max frame->Xmax
557     #define min frame->Xmin
558     #define number frame->Xnumber
559     #define offset frame->Xoffset
560     #define op frame->Xop
561     #define save_capture_last frame->Xsave_capture_last
562     #define save_offset1 frame->Xsave_offset1
563     #define save_offset2 frame->Xsave_offset2
564     #define save_offset3 frame->Xsave_offset3
565     #define stacksave frame->Xstacksave
566    
567     #define newptrb frame->Xnewptrb
568    
569     /* When recursion is being used, local variables are allocated on the stack and
570     get preserved during recursion in the normal way. In this environment, fi and
571     i, and fc and c, can be the same variables. */
572    
573 nigel 93 #else /* NO_RECURSE not defined */
574 nigel 77 #define fi i
575     #define fc c
576    
577    
578 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
579     const uschar *charptr; /* in small blocks of the code. My normal */
580     #endif /* style of coding would have declared */
581     const uschar *callpat; /* them within each of those blocks. */
582     const uschar *data; /* However, in order to accommodate the */
583     const uschar *next; /* version of this code that uses an */
584     USPTR pp; /* external "stack" implemented on the */
585     const uschar *prev; /* heap, it is easier to declare them all */
586     USPTR saved_eptr; /* here, so the declarations can be cut */
587     /* out in a block. The only declarations */
588     recursion_info new_recursive; /* within blocks below are for variables */
589     /* that do not have to be preserved over */
590     BOOL cur_is_word; /* a recursive call to RMATCH(). */
591     BOOL condition;
592 nigel 77 BOOL prev_is_word;
593    
594     unsigned long int original_ims;
595    
596     #ifdef SUPPORT_UCP
597     int prop_type;
598 nigel 87 int prop_value;
599 nigel 77 int prop_fail_result;
600     int prop_category;
601     int prop_chartype;
602 nigel 87 int prop_script;
603 ph10 115 int oclength;
604     uschar occhars[8];
605 nigel 77 #endif
606    
607 ph10 399 int codelink;
608 nigel 77 int ctype;
609     int length;
610     int max;
611     int min;
612     int number;
613     int offset;
614     int op;
615     int save_capture_last;
616     int save_offset1, save_offset2, save_offset3;
617     int stacksave[REC_STACK_SAVE_MAX];
618    
619     eptrblock newptrb;
620 nigel 93 #endif /* NO_RECURSE */
621 nigel 77
622     /* These statements are here to stop the compiler complaining about unitialized
623     variables. */
624    
625     #ifdef SUPPORT_UCP
626 nigel 87 prop_value = 0;
627 nigel 77 prop_fail_result = 0;
628     #endif
629    
630 nigel 93
631 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
632     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
633     used. Thanks to Ian Taylor for noticing this possibility and sending the
634     original patch. */
635    
636     TAIL_RECURSE:
637    
638 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
639     are specified by the macro RMATCH and RRETURN is used to return. When
640     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
641 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
642 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
643     complicated macro. It has to be used in one particular way. This shouldn't,
644     however, impact performance when true recursion is being used. */
645 nigel 77
646 ph10 164 #ifdef SUPPORT_UTF8
647     utf8 = md->utf8; /* Local copy of the flag */
648     #else
649     utf8 = FALSE;
650     #endif
651    
652 nigel 87 /* First check that we haven't called match() too many times, or that we
653     haven't exceeded the recursive call limit. */
654    
655 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
656 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
657 nigel 77
658     original_ims = ims; /* Save for resetting on ')' */
659 nigel 91
660 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
661     string, the match_cbegroup flag is set. When this is the case, add the current
662     subject pointer to the chain of such remembered pointers, to be checked when we
663     hit the closing ket, in order to break infinite loops that match no characters.
664 ph10 197 When match() is called in other circumstances, don't add to the chain. The
665     match_cbegroup flag must NOT be used with tail recursion, because the memory
666     block that is used is on the stack, so a new one may be required for each
667     match(). */
668 nigel 77
669 nigel 93 if ((flags & match_cbegroup) != 0)
670 nigel 77 {
671 ph10 197 newptrb.epb_saved_eptr = eptr;
672     newptrb.epb_prev = eptrb;
673     eptrb = &newptrb;
674 nigel 77 }
675    
676 nigel 93 /* Now start processing the opcodes. */
677 nigel 77
678     for (;;)
679     {
680 nigel 93 minimize = possessive = FALSE;
681 nigel 77 op = *ecode;
682 ph10 443
683 nigel 93 switch(op)
684     {
685 ph10 510 case OP_MARK:
686     markptr = ecode + 2;
687     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
688     ims, eptrb, flags, RM51);
689    
690     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
691     argument, and we must check whether that argument matches this MARK's
692     argument. It is passed back in md->start_match_ptr (an overloading of that
693     variable). If it does match, we reset that variable to the current subject
694     position and return MATCH_SKIP. Otherwise, pass back the return code
695     unaltered. */
696    
697     if (rrc == MATCH_SKIP_ARG &&
698     strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
699     {
700     md->start_match_ptr = eptr;
701     RRETURN(MATCH_SKIP);
702     }
703    
704     if (md->mark == NULL) md->mark = markptr;
705     RRETURN(rrc);
706    
707 ph10 210 case OP_FAIL:
708 ph10 510 MRRETURN(MATCH_NOMATCH);
709 ph10 211
710 ph10 510 case OP_COMMIT:
711     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
712     ims, eptrb, flags, RM52);
713     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
714     MRRETURN(MATCH_COMMIT);
715    
716 ph10 210 case OP_PRUNE:
717     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718     ims, eptrb, flags, RM51);
719     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
720 ph10 510 MRRETURN(MATCH_PRUNE);
721 ph10 211
722 ph10 510 case OP_PRUNE_ARG:
723     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
724     ims, eptrb, flags, RM51);
725 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
726 ph10 510 md->mark = ecode + 2;
727     RRETURN(MATCH_PRUNE);
728 ph10 211
729 ph10 210 case OP_SKIP:
730     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
731     ims, eptrb, flags, RM53);
732     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
733 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
734 ph10 510 MRRETURN(MATCH_SKIP);
735 ph10 211
736 ph10 510 case OP_SKIP_ARG:
737     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
738     ims, eptrb, flags, RM53);
739     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
740    
741     /* Pass back the current skip name by overloading md->start_match_ptr and
742     returning the special MATCH_SKIP_ARG return code. This will either be
743     caught by a matching MARK, or get to the top, where it is treated the same
744     as PRUNE. */
745    
746     md->start_match_ptr = ecode + 2;
747     RRETURN(MATCH_SKIP_ARG);
748    
749 ph10 210 case OP_THEN:
750     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
751 ph10 212 ims, eptrb, flags, RM54);
752 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
753 ph10 510 MRRETURN(MATCH_THEN);
754    
755     case OP_THEN_ARG:
756     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757     ims, eptrb, flags, RM54);
758     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
759     md->mark = ecode + 2;
760 ph10 212 RRETURN(MATCH_THEN);
761 ph10 211
762 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
763     the current subject position in the working slot at the top of the vector.
764     We mustn't change the current values of the data slot, because they may be
765     set from a previous iteration of this group, and be referred to by a
766     reference inside the group.
767 nigel 77
768 nigel 93 If the bracket fails to match, we need to restore this value and also the
769     values of the final offsets, in case they were set by a previous iteration
770     of the same bracket.
771 nigel 77
772 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
773     a non-capturing bracket. Don't worry about setting the flag for the error
774     case here; that is handled in the code for KET. */
775 nigel 77
776 nigel 93 case OP_CBRA:
777     case OP_SCBRA:
778     number = GET2(ecode, 1+LINK_SIZE);
779 nigel 77 offset = number << 1;
780    
781 ph10 475 #ifdef PCRE_DEBUG
782 nigel 93 printf("start bracket %d\n", number);
783     printf("subject=");
784 nigel 77 pchars(eptr, 16, TRUE, md);
785     printf("\n");
786     #endif
787    
788     if (offset < md->offset_max)
789     {
790     save_offset1 = md->offset_vector[offset];
791     save_offset2 = md->offset_vector[offset+1];
792     save_offset3 = md->offset_vector[md->offset_end - number];
793     save_capture_last = md->capture_last;
794    
795     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
796     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
797    
798 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
799 nigel 77 do
800     {
801 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
802     ims, eptrb, flags, RM1);
803 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
804 nigel 77 md->capture_last = save_capture_last;
805     ecode += GET(ecode, 1);
806     }
807     while (*ecode == OP_ALT);
808    
809     DPRINTF(("bracket %d failed\n", number));
810    
811     md->offset_vector[offset] = save_offset1;
812     md->offset_vector[offset+1] = save_offset2;
813     md->offset_vector[md->offset_end - number] = save_offset3;
814    
815 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
816 nigel 77 RRETURN(MATCH_NOMATCH);
817     }
818    
819 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
820     as a non-capturing bracket. */
821 nigel 77
822 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824    
825 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
826 nigel 77
827 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829    
830 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
831     final alternative within the brackets, we would return the result of a
832     recursive call to match() whatever happened. We can reduce stack usage by
833 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
834     is set.*/
835 nigel 77
836 nigel 93 case OP_BRA:
837     case OP_SBRA:
838     DPRINTF(("start non-capturing bracket\n"));
839     flags = (op >= OP_SBRA)? match_cbegroup : 0;
840 nigel 91 for (;;)
841 nigel 77 {
842 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
843 nigel 93 {
844 ph10 197 if (flags == 0) /* Not a possibly empty group */
845     {
846     ecode += _pcre_OP_lengths[*ecode];
847     DPRINTF(("bracket 0 tail recursion\n"));
848     goto TAIL_RECURSE;
849     }
850    
851     /* Possibly empty group; can't use tail recursion. */
852    
853     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
854     eptrb, flags, RM48);
855 ph10 510 if (rrc == MATCH_NOMATCH) md->mark = markptr;
856     RRETURN(rrc);
857 nigel 93 }
858 nigel 91
859     /* For non-final alternatives, continue the loop for a NOMATCH result;
860     otherwise return. */
861    
862 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
863     eptrb, flags, RM2);
864 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
865 nigel 77 ecode += GET(ecode, 1);
866     }
867 nigel 91 /* Control never reaches here. */
868 nigel 77
869     /* Conditional group: compilation checked that there are no more than
870     two branches. If the condition is false, skipping the first branch takes us
871     past the end if there is only one branch, but that's OK because that is
872 nigel 91 exactly what going to the ket would do. As there is only one branch to be
873     obeyed, we can use tail recursion to avoid using another stack frame. */
874 nigel 77
875     case OP_COND:
876 nigel 93 case OP_SCOND:
877 ph10 399 codelink= GET(ecode, 1);
878 ph10 406
879 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
880     inserted between OP_COND and an assertion condition. */
881 ph10 392
882 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
883     {
884     if (pcre_callout != NULL)
885     {
886     pcre_callout_block cb;
887     cb.version = 1; /* Version 1 of the callout block */
888     cb.callout_number = ecode[LINK_SIZE+2];
889     cb.offset_vector = md->offset_vector;
890     cb.subject = (PCRE_SPTR)md->start_subject;
891     cb.subject_length = md->end_subject - md->start_subject;
892     cb.start_match = mstart - md->start_subject;
893     cb.current_position = eptr - md->start_subject;
894     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
895     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
896     cb.capture_top = offset_top/2;
897     cb.capture_last = md->capture_last;
898     cb.callout_data = md->callout_data;
899 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
900 ph10 381 if (rrc < 0) RRETURN(rrc);
901     }
902     ecode += _pcre_OP_lengths[OP_CALLOUT];
903     }
904 ph10 392
905 ph10 399 condcode = ecode[LINK_SIZE+1];
906 ph10 406
907 ph10 381 /* Now see what the actual condition is */
908 ph10 392
909 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
910 nigel 77 {
911 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
912     {
913 ph10 461 condition = FALSE;
914     ecode += GET(ecode, 1);
915     }
916 ph10 459 else
917 ph10 461 {
918 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
919     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
920 ph10 461
921 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
922     false, but the test was set up by name, scan the table to see if the
923     name refers to any other numbers, and test them. The condition is true
924     if any one is set. */
925 ph10 461
926 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
927     {
928     uschar *slotA = md->name_table;
929     for (i = 0; i < md->name_count; i++)
930 ph10 461 {
931     if (GET2(slotA, 0) == recno) break;
932 ph10 459 slotA += md->name_entry_size;
933     }
934 ph10 461
935 ph10 459 /* Found a name for the number - there can be only one; duplicate
936     names for different numbers are allowed, but not vice versa. First
937     scan down for duplicates. */
938 ph10 461
939 ph10 459 if (i < md->name_count)
940 ph10 461 {
941 ph10 459 uschar *slotB = slotA;
942     while (slotB > md->name_table)
943     {
944     slotB -= md->name_entry_size;
945     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
946     {
947     condition = GET2(slotB, 0) == md->recursive->group_num;
948 ph10 461 if (condition) break;
949     }
950 ph10 459 else break;
951 ph10 461 }
952    
953 ph10 459 /* Scan up for duplicates */
954 ph10 461
955 ph10 459 if (!condition)
956 ph10 461 {
957 ph10 459 slotB = slotA;
958     for (i++; i < md->name_count; i++)
959     {
960     slotB += md->name_entry_size;
961     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
962     {
963     condition = GET2(slotB, 0) == md->recursive->group_num;
964     if (condition) break;
965 ph10 461 }
966 ph10 459 else break;
967 ph10 461 }
968     }
969 ph10 459 }
970 ph10 461 }
971    
972 ph10 459 /* Chose branch according to the condition */
973 ph10 461
974 ph10 459 ecode += condition? 3 : GET(ecode, 1);
975     }
976 ph10 461 }
977 nigel 93
978 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
979 nigel 93 {
980 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
981 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
982 ph10 461
983 ph10 459 /* If the numbered capture is unset, but the reference was by name,
984 ph10 461 scan the table to see if the name refers to any other numbers, and test
985     them. The condition is true if any one is set. This is tediously similar
986     to the code above, but not close enough to try to amalgamate. */
987    
988 ph10 459 if (!condition && condcode == OP_NCREF)
989     {
990 ph10 461 int refno = offset >> 1;
991 ph10 459 uschar *slotA = md->name_table;
992 ph10 461
993 ph10 459 for (i = 0; i < md->name_count; i++)
994 ph10 461 {
995     if (GET2(slotA, 0) == refno) break;
996 ph10 459 slotA += md->name_entry_size;
997     }
998 ph10 461
999     /* Found a name for the number - there can be only one; duplicate names
1000     for different numbers are allowed, but not vice versa. First scan down
1001 ph10 459 for duplicates. */
1002 ph10 461
1003 ph10 459 if (i < md->name_count)
1004 ph10 461 {
1005 ph10 459 uschar *slotB = slotA;
1006     while (slotB > md->name_table)
1007     {
1008     slotB -= md->name_entry_size;
1009     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1010     {
1011     offset = GET2(slotB, 0) << 1;
1012 ph10 461 condition = offset < offset_top &&
1013 ph10 459 md->offset_vector[offset] >= 0;
1014 ph10 461 if (condition) break;
1015     }
1016 ph10 459 else break;
1017 ph10 461 }
1018    
1019 ph10 459 /* Scan up for duplicates */
1020 ph10 461
1021 ph10 459 if (!condition)
1022 ph10 461 {
1023 ph10 459 slotB = slotA;
1024     for (i++; i < md->name_count; i++)
1025     {
1026     slotB += md->name_entry_size;
1027     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1028     {
1029     offset = GET2(slotB, 0) << 1;
1030 ph10 461 condition = offset < offset_top &&
1031 ph10 459 md->offset_vector[offset] >= 0;
1032 ph10 461 if (condition) break;
1033     }
1034 ph10 459 else break;
1035 ph10 461 }
1036     }
1037 ph10 459 }
1038 ph10 461 }
1039    
1040 ph10 459 /* Chose branch according to the condition */
1041    
1042 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1043 nigel 77 }
1044    
1045 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1046 nigel 93 {
1047     condition = FALSE;
1048     ecode += GET(ecode, 1);
1049     }
1050    
1051 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1052 nigel 93 the final argument match_condassert causes it to stop at the end of an
1053     assertion. */
1054 nigel 77
1055     else
1056     {
1057 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1058     match_condassert, RM3);
1059 nigel 77 if (rrc == MATCH_MATCH)
1060     {
1061 nigel 93 condition = TRUE;
1062     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1063 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1064     }
1065 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1066 nigel 77 {
1067     RRETURN(rrc); /* Need braces because of following else */
1068     }
1069 nigel 93 else
1070     {
1071     condition = FALSE;
1072 ph10 399 ecode += codelink;
1073 nigel 93 }
1074     }
1075 nigel 91
1076 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1077 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1078     match_cbegroup is required for an unlimited repeat of a possibly empty
1079     group. If the second alternative doesn't exist, we can just plough on. */
1080 nigel 91
1081 nigel 93 if (condition || *ecode == OP_ALT)
1082     {
1083 nigel 91 ecode += 1 + LINK_SIZE;
1084 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1085     {
1086     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1087     RRETURN(rrc);
1088     }
1089     else /* Group must match something */
1090     {
1091     flags = 0;
1092     goto TAIL_RECURSE;
1093     }
1094 nigel 77 }
1095 ph10 395 else /* Condition false & no alternative */
1096 nigel 93 {
1097     ecode += 1 + LINK_SIZE;
1098     }
1099     break;
1100 nigel 77
1101 ph10 461
1102 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1103     to close any currently open capturing brackets. */
1104 ph10 461
1105 ph10 447 case OP_CLOSE:
1106 ph10 461 number = GET2(ecode, 1);
1107 ph10 447 offset = number << 1;
1108 ph10 461
1109 ph10 475 #ifdef PCRE_DEBUG
1110 ph10 447 printf("end bracket %d at *ACCEPT", number);
1111     printf("\n");
1112     #endif
1113 nigel 77
1114 ph10 447 md->capture_last = number;
1115     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1116     {
1117     md->offset_vector[offset] =
1118     md->offset_vector[md->offset_end - number];
1119     md->offset_vector[offset+1] = eptr - md->start_subject;
1120     if (offset_top <= offset) offset_top = offset + 2;
1121     }
1122     ecode += 3;
1123 ph10 461 break;
1124 ph10 447
1125    
1126 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1127     recursion, we should restore the offsets appropriately and continue from
1128     after the call. */
1129 nigel 77
1130 ph10 210 case OP_ACCEPT:
1131 nigel 77 case OP_END:
1132     if (md->recursive != NULL && md->recursive->group_num == 0)
1133     {
1134     recursion_info *rec = md->recursive;
1135 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1136 nigel 77 md->recursive = rec->prevrec;
1137     memmove(md->offset_vector, rec->offset_save,
1138     rec->saved_max * sizeof(int));
1139 ph10 461 offset_top = rec->save_offset_top;
1140 nigel 77 ims = original_ims;
1141     ecode = rec->after_call;
1142     break;
1143     }
1144    
1145 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1146     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1147     the subject. In both cases, backtracking will then try other alternatives,
1148     if any. */
1149 ph10 443
1150 ph10 442 if (eptr == mstart &&
1151     (md->notempty ||
1152 ph10 443 (md->notempty_atstart &&
1153 ph10 442 mstart == md->start_subject + md->start_offset)))
1154 ph10 510 MRRETURN(MATCH_NOMATCH);
1155 ph10 443
1156 ph10 442 /* Otherwise, we have a match. */
1157 nigel 77
1158 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1159     md->end_offset_top = offset_top; /* and how many extracts were taken */
1160 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1161 ph10 511 MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));
1162 nigel 77
1163     /* Change option settings */
1164    
1165     case OP_OPT:
1166     ims = ecode[1];
1167     ecode += 2;
1168     DPRINTF(("ims set to %02lx\n", ims));
1169     break;
1170    
1171     /* Assertion brackets. Check the alternative branches in turn - the
1172     matching won't pass the KET for an assertion. If any one branch matches,
1173     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1174     start of each branch to move the current point backwards, so the code at
1175     this level is identical to the lookahead case. */
1176    
1177     case OP_ASSERT:
1178     case OP_ASSERTBACK:
1179     do
1180     {
1181 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1182     RM4);
1183 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1184 ph10 500 {
1185     mstart = md->start_match_ptr; /* In case \K reset it */
1186     break;
1187 ph10 501 }
1188 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1189 nigel 77 ecode += GET(ecode, 1);
1190     }
1191     while (*ecode == OP_ALT);
1192 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1193 nigel 77
1194     /* If checking an assertion for a condition, return MATCH_MATCH. */
1195    
1196     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1197    
1198     /* Continue from after the assertion, updating the offsets high water
1199     mark, since extracts may have been taken during the assertion. */
1200    
1201     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1202     ecode += 1 + LINK_SIZE;
1203     offset_top = md->end_offset_top;
1204     continue;
1205    
1206 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1207 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1208 ph10 473 branches. */
1209 nigel 77
1210     case OP_ASSERT_NOT:
1211     case OP_ASSERTBACK_NOT:
1212     do
1213     {
1214 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215     RM5);
1216 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1217 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1218     {
1219     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1220 ph10 482 break;
1221     }
1222 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1223 nigel 77 ecode += GET(ecode,1);
1224     }
1225     while (*ecode == OP_ALT);
1226    
1227     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1228    
1229     ecode += 1 + LINK_SIZE;
1230     continue;
1231    
1232     /* Move the subject pointer back. This occurs only at the start of
1233     each branch of a lookbehind assertion. If we are too close to the start to
1234     move back, this match function fails. When working with UTF-8 we move
1235     back a number of characters, not bytes. */
1236    
1237     case OP_REVERSE:
1238     #ifdef SUPPORT_UTF8
1239     if (utf8)
1240     {
1241 nigel 93 i = GET(ecode, 1);
1242     while (i-- > 0)
1243 nigel 77 {
1244     eptr--;
1245 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1246 ph10 207 BACKCHAR(eptr);
1247 nigel 77 }
1248     }
1249     else
1250     #endif
1251    
1252     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1253    
1254     {
1255 nigel 93 eptr -= GET(ecode, 1);
1256 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1257 nigel 77 }
1258    
1259 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1260 nigel 77
1261 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1262 nigel 77 ecode += 1 + LINK_SIZE;
1263     break;
1264    
1265     /* The callout item calls an external function, if one is provided, passing
1266     details of the match so far. This is mainly for debugging, though the
1267     function is able to force a failure. */
1268    
1269     case OP_CALLOUT:
1270     if (pcre_callout != NULL)
1271     {
1272     pcre_callout_block cb;
1273     cb.version = 1; /* Version 1 of the callout block */
1274     cb.callout_number = ecode[1];
1275     cb.offset_vector = md->offset_vector;
1276 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1277 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1278 ph10 168 cb.start_match = mstart - md->start_subject;
1279 nigel 77 cb.current_position = eptr - md->start_subject;
1280     cb.pattern_position = GET(ecode, 2);
1281     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1282     cb.capture_top = offset_top/2;
1283     cb.capture_last = md->capture_last;
1284     cb.callout_data = md->callout_data;
1285 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1286 nigel 77 if (rrc < 0) RRETURN(rrc);
1287     }
1288     ecode += 2 + 2*LINK_SIZE;
1289     break;
1290    
1291     /* Recursion either matches the current regex, or some subexpression. The
1292     offset data is the offset to the starting bracket from the start of the
1293     whole pattern. (This is so that it works from duplicated subpatterns.)
1294    
1295     If there are any capturing brackets started but not finished, we have to
1296     save their starting points and reinstate them after the recursion. However,
1297     we don't know how many such there are (offset_top records the completed
1298     total) so we just have to save all the potential data. There may be up to
1299     65535 such values, which is too large to put on the stack, but using malloc
1300     for small numbers seems expensive. As a compromise, the stack is used when
1301     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1302     is used. A problem is what to do if the malloc fails ... there is no way of
1303     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1304     values on the stack, and accept that the rest may be wrong.
1305    
1306     There are also other values that have to be saved. We use a chained
1307     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1308     for the original version of this logic. */
1309    
1310     case OP_RECURSE:
1311     {
1312     callpat = md->start_code + GET(ecode, 1);
1313 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1314     GET2(callpat, 1 + LINK_SIZE);
1315 nigel 77
1316     /* Add to "recursing stack" */
1317    
1318     new_recursive.prevrec = md->recursive;
1319     md->recursive = &new_recursive;
1320    
1321     /* Find where to continue from afterwards */
1322    
1323     ecode += 1 + LINK_SIZE;
1324     new_recursive.after_call = ecode;
1325    
1326     /* Now save the offset data. */
1327    
1328     new_recursive.saved_max = md->offset_end;
1329     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1330     new_recursive.offset_save = stacksave;
1331     else
1332     {
1333     new_recursive.offset_save =
1334     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1335     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1336     }
1337    
1338     memcpy(new_recursive.offset_save, md->offset_vector,
1339     new_recursive.saved_max * sizeof(int));
1340 ph10 461 new_recursive.save_offset_top = offset_top;
1341 nigel 77
1342     /* OK, now we can do the recursion. For each top-level alternative we
1343     restore the offset and recursion data. */
1344    
1345     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1346 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1347 nigel 77 do
1348     {
1349 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1350     md, ims, eptrb, flags, RM6);
1351 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1352 nigel 77 {
1353 nigel 87 DPRINTF(("Recursion matched\n"));
1354 nigel 77 md->recursive = new_recursive.prevrec;
1355     if (new_recursive.offset_save != stacksave)
1356     (pcre_free)(new_recursive.offset_save);
1357 ph10 510 MRRETURN(MATCH_MATCH);
1358 nigel 77 }
1359 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1360 nigel 87 {
1361     DPRINTF(("Recursion gave error %d\n", rrc));
1362 ph10 400 if (new_recursive.offset_save != stacksave)
1363     (pcre_free)(new_recursive.offset_save);
1364 nigel 87 RRETURN(rrc);
1365     }
1366 nigel 77
1367     md->recursive = &new_recursive;
1368     memcpy(md->offset_vector, new_recursive.offset_save,
1369     new_recursive.saved_max * sizeof(int));
1370     callpat += GET(callpat, 1);
1371     }
1372     while (*callpat == OP_ALT);
1373    
1374     DPRINTF(("Recursion didn't match\n"));
1375     md->recursive = new_recursive.prevrec;
1376     if (new_recursive.offset_save != stacksave)
1377     (pcre_free)(new_recursive.offset_save);
1378 ph10 510 MRRETURN(MATCH_NOMATCH);
1379 nigel 77 }
1380     /* Control never reaches here */
1381    
1382     /* "Once" brackets are like assertion brackets except that after a match,
1383     the point in the subject string is not moved back. Thus there can never be
1384     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1385     Check the alternative branches in turn - the matching won't pass the KET
1386     for this kind of subpattern. If any one branch matches, we carry on as at
1387 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1388     the start-of-match value in case it was changed by \K. */
1389 nigel 77
1390     case OP_ONCE:
1391 nigel 91 prev = ecode;
1392     saved_eptr = eptr;
1393    
1394     do
1395 nigel 77 {
1396 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1397 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1398 ph10 500 {
1399     mstart = md->start_match_ptr;
1400     break;
1401 ph10 501 }
1402 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1403 nigel 91 ecode += GET(ecode,1);
1404     }
1405     while (*ecode == OP_ALT);
1406 nigel 77
1407 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1408 nigel 77
1409 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1410 nigel 77
1411 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1412     mark, since extracts may have been taken. */
1413 nigel 77
1414 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1415 nigel 77
1416 nigel 91 offset_top = md->end_offset_top;
1417     eptr = md->end_match_ptr;
1418 nigel 77
1419 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1420     happens for a repeating ket if no characters were matched in the group.
1421     This is the forcible breaking of infinite loops as implemented in Perl
1422     5.005. If there is an options reset, it will get obeyed in the normal
1423     course of events. */
1424 nigel 77
1425 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1426     {
1427     ecode += 1+LINK_SIZE;
1428     break;
1429     }
1430 nigel 77
1431 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1432     preceding bracket, in the appropriate order. The second "call" of match()
1433     uses tail recursion, to avoid using another stack frame. We need to reset
1434     any options that changed within the bracket before re-running it, so
1435     check the next opcode. */
1436 nigel 77
1437 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1438     {
1439     ims = (ims & ~PCRE_IMS) | ecode[4];
1440     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1441     }
1442 nigel 77
1443 nigel 91 if (*ecode == OP_KETRMIN)
1444     {
1445 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1446 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1447     ecode = prev;
1448 ph10 197 flags = 0;
1449 nigel 91 goto TAIL_RECURSE;
1450 nigel 77 }
1451 nigel 91 else /* OP_KETRMAX */
1452     {
1453 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1454 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1455     ecode += 1 + LINK_SIZE;
1456 ph10 197 flags = 0;
1457 nigel 91 goto TAIL_RECURSE;
1458     }
1459     /* Control never gets here */
1460 nigel 77
1461     /* An alternation is the end of a branch; scan along to find the end of the
1462     bracketed group and go to there. */
1463    
1464     case OP_ALT:
1465     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1466     break;
1467    
1468 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1469     indicating that it may occur zero times. It may repeat infinitely, or not
1470     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1471     with fixed upper repeat limits are compiled as a number of copies, with the
1472     optional ones preceded by BRAZERO or BRAMINZERO. */
1473 nigel 77
1474     case OP_BRAZERO:
1475     {
1476     next = ecode+1;
1477 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1478 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1479     do next += GET(next,1); while (*next == OP_ALT);
1480 nigel 93 ecode = next + 1 + LINK_SIZE;
1481 nigel 77 }
1482     break;
1483    
1484     case OP_BRAMINZERO:
1485     {
1486     next = ecode+1;
1487 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1488 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1489 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1490     ecode++;
1491     }
1492     break;
1493    
1494 ph10 335 case OP_SKIPZERO:
1495     {
1496     next = ecode+1;
1497     do next += GET(next,1); while (*next == OP_ALT);
1498     ecode = next + 1 + LINK_SIZE;
1499     }
1500     break;
1501    
1502 nigel 93 /* End of a group, repeated or non-repeating. */
1503 nigel 77
1504     case OP_KET:
1505     case OP_KETRMIN:
1506     case OP_KETRMAX:
1507 nigel 91 prev = ecode - GET(ecode, 1);
1508 nigel 77
1509 nigel 93 /* If this was a group that remembered the subject start, in order to break
1510     infinite repeats of empty string matches, retrieve the subject start from
1511     the chain. Otherwise, set it NULL. */
1512 nigel 77
1513 nigel 93 if (*prev >= OP_SBRA)
1514     {
1515     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1516     eptrb = eptrb->epb_prev; /* Backup to previous group */
1517     }
1518     else saved_eptr = NULL;
1519 nigel 77
1520 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1521     matching and return MATCH_MATCH, but record the current high water mark for
1522     use by positive assertions. We also need to record the match start in case
1523     it was changed by \K. */
1524 nigel 93
1525 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1526     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1527     *prev == OP_ONCE)
1528     {
1529     md->end_match_ptr = eptr; /* For ONCE */
1530     md->end_offset_top = offset_top;
1531 ph10 500 md->start_match_ptr = mstart;
1532 ph10 510 MRRETURN(MATCH_MATCH);
1533 nigel 91 }
1534 nigel 77
1535 nigel 93 /* For capturing groups we have to check the group number back at the start
1536     and if necessary complete handling an extraction by setting the offsets and
1537     bumping the high water mark. Note that whole-pattern recursion is coded as
1538     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1539     when the OP_END is reached. Other recursion is handled here. */
1540 nigel 77
1541 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1542 nigel 91 {
1543 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1544 nigel 91 offset = number << 1;
1545 ph10 461
1546 ph10 475 #ifdef PCRE_DEBUG
1547 nigel 91 printf("end bracket %d", number);
1548     printf("\n");
1549 nigel 77 #endif
1550    
1551 nigel 93 md->capture_last = number;
1552     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1553 nigel 91 {
1554 nigel 93 md->offset_vector[offset] =
1555     md->offset_vector[md->offset_end - number];
1556     md->offset_vector[offset+1] = eptr - md->start_subject;
1557     if (offset_top <= offset) offset_top = offset + 2;
1558     }
1559 nigel 77
1560 nigel 93 /* Handle a recursively called group. Restore the offsets
1561     appropriately and continue from after the call. */
1562 nigel 77
1563 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1564     {
1565     recursion_info *rec = md->recursive;
1566     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1567     md->recursive = rec->prevrec;
1568     memcpy(md->offset_vector, rec->offset_save,
1569     rec->saved_max * sizeof(int));
1570 ph10 461 offset_top = rec->save_offset_top;
1571 nigel 93 ecode = rec->after_call;
1572     ims = original_ims;
1573     break;
1574 nigel 77 }
1575 nigel 91 }
1576 nigel 77
1577 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1578     flags, in case they got changed during the group. */
1579 nigel 77
1580 nigel 91 ims = original_ims;
1581     DPRINTF(("ims reset to %02lx\n", ims));
1582 nigel 77
1583 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1584     happens for a repeating ket if no characters were matched in the group.
1585     This is the forcible breaking of infinite loops as implemented in Perl
1586     5.005. If there is an options reset, it will get obeyed in the normal
1587     course of events. */
1588 nigel 77
1589 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1590     {
1591     ecode += 1 + LINK_SIZE;
1592     break;
1593     }
1594 nigel 77
1595 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1596     preceding bracket, in the appropriate order. In the second case, we can use
1597 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1598     unlimited repeat of a group that can match an empty string. */
1599 nigel 77
1600 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1601    
1602 nigel 91 if (*ecode == OP_KETRMIN)
1603     {
1604 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1605 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1606 ph10 197 if (flags != 0) /* Could match an empty string */
1607     {
1608     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1609     RRETURN(rrc);
1610     }
1611 nigel 91 ecode = prev;
1612     goto TAIL_RECURSE;
1613 nigel 77 }
1614 nigel 91 else /* OP_KETRMAX */
1615     {
1616 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1617 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1618     ecode += 1 + LINK_SIZE;
1619 ph10 197 flags = 0;
1620 nigel 91 goto TAIL_RECURSE;
1621     }
1622     /* Control never gets here */
1623 nigel 77
1624     /* Start of subject unless notbol, or after internal newline if multiline */
1625    
1626     case OP_CIRC:
1627 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1628 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1629     {
1630 nigel 91 if (eptr != md->start_subject &&
1631 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1632 ph10 510 MRRETURN(MATCH_NOMATCH);
1633 nigel 77 ecode++;
1634     break;
1635     }
1636     /* ... else fall through */
1637    
1638     /* Start of subject assertion */
1639    
1640     case OP_SOD:
1641 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1642 nigel 77 ecode++;
1643     break;
1644    
1645     /* Start of match assertion */
1646    
1647     case OP_SOM:
1648 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1649 nigel 77 ecode++;
1650     break;
1651 ph10 172
1652 ph10 168 /* Reset the start of match point */
1653 ph10 172
1654 ph10 168 case OP_SET_SOM:
1655     mstart = eptr;
1656 ph10 172 ecode++;
1657     break;
1658 nigel 77
1659     /* Assert before internal newline if multiline, or before a terminating
1660     newline unless endonly is set, else end of subject unless noteol is set. */
1661    
1662     case OP_DOLL:
1663     if ((ims & PCRE_MULTILINE) != 0)
1664     {
1665     if (eptr < md->end_subject)
1666 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1667 nigel 77 else
1668 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1669 nigel 77 ecode++;
1670     break;
1671     }
1672     else
1673     {
1674 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1675 nigel 77 if (!md->endonly)
1676     {
1677 nigel 91 if (eptr != md->end_subject &&
1678 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1679 ph10 510 MRRETURN(MATCH_NOMATCH);
1680 nigel 77 ecode++;
1681     break;
1682     }
1683     }
1684 nigel 91 /* ... else fall through for endonly */
1685 nigel 77
1686     /* End of subject assertion (\z) */
1687    
1688     case OP_EOD:
1689 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1690 nigel 77 ecode++;
1691     break;
1692    
1693     /* End of subject or ending \n assertion (\Z) */
1694    
1695     case OP_EODN:
1696 nigel 91 if (eptr != md->end_subject &&
1697 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1698 ph10 510 MRRETURN(MATCH_NOMATCH);
1699 nigel 77 ecode++;
1700     break;
1701    
1702     /* Word boundary assertions */
1703    
1704     case OP_NOT_WORD_BOUNDARY:
1705     case OP_WORD_BOUNDARY:
1706     {
1707    
1708     /* Find out if the previous and current characters are "word" characters.
1709     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1710 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1711 ph10 435 partial matching. */
1712 nigel 77
1713     #ifdef SUPPORT_UTF8
1714     if (utf8)
1715     {
1716     if (eptr == md->start_subject) prev_is_word = FALSE; else
1717     {
1718 ph10 409 USPTR lastptr = eptr - 1;
1719 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1720 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1721 nigel 77 GETCHAR(c, lastptr);
1722     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1723     }
1724 ph10 443 if (eptr >= md->end_subject)
1725 nigel 77 {
1726 ph10 443 SCHECK_PARTIAL();
1727     cur_is_word = FALSE;
1728 ph10 428 }
1729     else
1730     {
1731 nigel 77 GETCHAR(c, eptr);
1732     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1733     }
1734     }
1735     else
1736     #endif
1737    
1738 ph10 428 /* Not in UTF-8 mode */
1739 nigel 77
1740     {
1741 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1742     {
1743 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1744 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1745     }
1746 ph10 443 if (eptr >= md->end_subject)
1747 ph10 428 {
1748 ph10 443 SCHECK_PARTIAL();
1749     cur_is_word = FALSE;
1750 ph10 428 }
1751     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1752 nigel 77 }
1753    
1754     /* Now see if the situation is what we want */
1755    
1756     if ((*ecode++ == OP_WORD_BOUNDARY)?
1757     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1758 ph10 510 MRRETURN(MATCH_NOMATCH);
1759 nigel 77 }
1760     break;
1761    
1762     /* Match a single character type; inline for speed */
1763    
1764     case OP_ANY:
1765 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1766 ph10 345 /* Fall through */
1767    
1768 ph10 341 case OP_ALLANY:
1769 ph10 443 if (eptr++ >= md->end_subject)
1770 ph10 428 {
1771 ph10 443 SCHECK_PARTIAL();
1772 ph10 510 MRRETURN(MATCH_NOMATCH);
1773 ph10 443 }
1774 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1775 nigel 77 ecode++;
1776     break;
1777    
1778     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1779     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1780    
1781     case OP_ANYBYTE:
1782 ph10 443 if (eptr++ >= md->end_subject)
1783 ph10 428 {
1784 ph10 443 SCHECK_PARTIAL();
1785 ph10 510 MRRETURN(MATCH_NOMATCH);
1786 ph10 443 }
1787 nigel 77 ecode++;
1788     break;
1789    
1790     case OP_NOT_DIGIT:
1791 ph10 443 if (eptr >= md->end_subject)
1792 ph10 428 {
1793 ph10 443 SCHECK_PARTIAL();
1794 ph10 510 MRRETURN(MATCH_NOMATCH);
1795 ph10 443 }
1796 nigel 77 GETCHARINCTEST(c, eptr);
1797     if (
1798     #ifdef SUPPORT_UTF8
1799     c < 256 &&
1800     #endif
1801     (md->ctypes[c] & ctype_digit) != 0
1802     )
1803 ph10 510 MRRETURN(MATCH_NOMATCH);
1804 nigel 77 ecode++;
1805     break;
1806    
1807     case OP_DIGIT:
1808 ph10 443 if (eptr >= md->end_subject)
1809 ph10 428 {
1810 ph10 443 SCHECK_PARTIAL();
1811 ph10 510 MRRETURN(MATCH_NOMATCH);
1812 ph10 443 }
1813 nigel 77 GETCHARINCTEST(c, eptr);
1814     if (
1815     #ifdef SUPPORT_UTF8
1816     c >= 256 ||
1817     #endif
1818     (md->ctypes[c] & ctype_digit) == 0
1819     )
1820 ph10 510 MRRETURN(MATCH_NOMATCH);
1821 nigel 77 ecode++;
1822     break;
1823    
1824     case OP_NOT_WHITESPACE:
1825 ph10 443 if (eptr >= md->end_subject)
1826 ph10 428 {
1827 ph10 443 SCHECK_PARTIAL();
1828 ph10 510 MRRETURN(MATCH_NOMATCH);
1829 ph10 443 }
1830 nigel 77 GETCHARINCTEST(c, eptr);
1831     if (
1832     #ifdef SUPPORT_UTF8
1833     c < 256 &&
1834     #endif
1835     (md->ctypes[c] & ctype_space) != 0
1836     )
1837 ph10 510 MRRETURN(MATCH_NOMATCH);
1838 nigel 77 ecode++;
1839     break;
1840    
1841     case OP_WHITESPACE:
1842 ph10 443 if (eptr >= md->end_subject)
1843 ph10 428 {
1844 ph10 443 SCHECK_PARTIAL();
1845 ph10 510 MRRETURN(MATCH_NOMATCH);
1846 ph10 443 }
1847 nigel 77 GETCHARINCTEST(c, eptr);
1848     if (
1849     #ifdef SUPPORT_UTF8
1850     c >= 256 ||
1851     #endif
1852     (md->ctypes[c] & ctype_space) == 0
1853     )
1854 ph10 510 MRRETURN(MATCH_NOMATCH);
1855 nigel 77 ecode++;
1856     break;
1857    
1858     case OP_NOT_WORDCHAR:
1859 ph10 443 if (eptr >= md->end_subject)
1860 ph10 428 {
1861 ph10 443 SCHECK_PARTIAL();
1862 ph10 510 MRRETURN(MATCH_NOMATCH);
1863 ph10 443 }
1864 nigel 77 GETCHARINCTEST(c, eptr);
1865     if (
1866     #ifdef SUPPORT_UTF8
1867     c < 256 &&
1868     #endif
1869     (md->ctypes[c] & ctype_word) != 0
1870     )
1871 ph10 510 MRRETURN(MATCH_NOMATCH);
1872 nigel 77 ecode++;
1873     break;
1874    
1875     case OP_WORDCHAR:
1876 ph10 443 if (eptr >= md->end_subject)
1877 ph10 428 {
1878 ph10 443 SCHECK_PARTIAL();
1879 ph10 510 MRRETURN(MATCH_NOMATCH);
1880 ph10 443 }
1881 nigel 77 GETCHARINCTEST(c, eptr);
1882     if (
1883     #ifdef SUPPORT_UTF8
1884     c >= 256 ||
1885     #endif
1886     (md->ctypes[c] & ctype_word) == 0
1887     )
1888 ph10 510 MRRETURN(MATCH_NOMATCH);
1889 nigel 77 ecode++;
1890     break;
1891    
1892 nigel 93 case OP_ANYNL:
1893 ph10 443 if (eptr >= md->end_subject)
1894 ph10 428 {
1895 ph10 443 SCHECK_PARTIAL();
1896 ph10 510 MRRETURN(MATCH_NOMATCH);
1897 ph10 443 }
1898 nigel 93 GETCHARINCTEST(c, eptr);
1899     switch(c)
1900     {
1901 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1902 nigel 93 case 0x000d:
1903     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1904     break;
1905 ph10 231
1906 nigel 93 case 0x000a:
1907 ph10 231 break;
1908    
1909 nigel 93 case 0x000b:
1910     case 0x000c:
1911     case 0x0085:
1912     case 0x2028:
1913     case 0x2029:
1914 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1915 nigel 93 break;
1916     }
1917     ecode++;
1918     break;
1919    
1920 ph10 178 case OP_NOT_HSPACE:
1921 ph10 443 if (eptr >= md->end_subject)
1922 ph10 428 {
1923 ph10 443 SCHECK_PARTIAL();
1924 ph10 510 MRRETURN(MATCH_NOMATCH);
1925 ph10 443 }
1926 ph10 178 GETCHARINCTEST(c, eptr);
1927     switch(c)
1928     {
1929     default: break;
1930     case 0x09: /* HT */
1931     case 0x20: /* SPACE */
1932     case 0xa0: /* NBSP */
1933     case 0x1680: /* OGHAM SPACE MARK */
1934     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1935     case 0x2000: /* EN QUAD */
1936     case 0x2001: /* EM QUAD */
1937     case 0x2002: /* EN SPACE */
1938     case 0x2003: /* EM SPACE */
1939     case 0x2004: /* THREE-PER-EM SPACE */
1940     case 0x2005: /* FOUR-PER-EM SPACE */
1941     case 0x2006: /* SIX-PER-EM SPACE */
1942     case 0x2007: /* FIGURE SPACE */
1943     case 0x2008: /* PUNCTUATION SPACE */
1944     case 0x2009: /* THIN SPACE */
1945     case 0x200A: /* HAIR SPACE */
1946     case 0x202f: /* NARROW NO-BREAK SPACE */
1947     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1948     case 0x3000: /* IDEOGRAPHIC SPACE */
1949 ph10 510 MRRETURN(MATCH_NOMATCH);
1950 ph10 178 }
1951     ecode++;
1952     break;
1953    
1954     case OP_HSPACE:
1955 ph10 443 if (eptr >= md->end_subject)
1956 ph10 428 {
1957 ph10 443 SCHECK_PARTIAL();
1958 ph10 510 MRRETURN(MATCH_NOMATCH);
1959 ph10 443 }
1960 ph10 178 GETCHARINCTEST(c, eptr);
1961     switch(c)
1962     {
1963 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1964 ph10 178 case 0x09: /* HT */
1965     case 0x20: /* SPACE */
1966     case 0xa0: /* NBSP */
1967     case 0x1680: /* OGHAM SPACE MARK */
1968     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1969     case 0x2000: /* EN QUAD */
1970     case 0x2001: /* EM QUAD */
1971     case 0x2002: /* EN SPACE */
1972     case 0x2003: /* EM SPACE */
1973     case 0x2004: /* THREE-PER-EM SPACE */
1974     case 0x2005: /* FOUR-PER-EM SPACE */
1975     case 0x2006: /* SIX-PER-EM SPACE */
1976     case 0x2007: /* FIGURE SPACE */
1977     case 0x2008: /* PUNCTUATION SPACE */
1978     case 0x2009: /* THIN SPACE */
1979     case 0x200A: /* HAIR SPACE */
1980     case 0x202f: /* NARROW NO-BREAK SPACE */
1981     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1982     case 0x3000: /* IDEOGRAPHIC SPACE */
1983     break;
1984     }
1985     ecode++;
1986     break;
1987    
1988     case OP_NOT_VSPACE:
1989 ph10 443 if (eptr >= md->end_subject)
1990 ph10 428 {
1991 ph10 443 SCHECK_PARTIAL();
1992 ph10 510 MRRETURN(MATCH_NOMATCH);
1993 ph10 443 }
1994 ph10 178 GETCHARINCTEST(c, eptr);
1995     switch(c)
1996     {
1997     default: break;
1998     case 0x0a: /* LF */
1999     case 0x0b: /* VT */
2000     case 0x0c: /* FF */
2001     case 0x0d: /* CR */
2002     case 0x85: /* NEL */
2003     case 0x2028: /* LINE SEPARATOR */
2004     case 0x2029: /* PARAGRAPH SEPARATOR */
2005 ph10 510 MRRETURN(MATCH_NOMATCH);
2006 ph10 178 }
2007     ecode++;
2008     break;
2009    
2010     case OP_VSPACE:
2011 ph10 443 if (eptr >= md->end_subject)
2012 ph10 428 {
2013 ph10 443 SCHECK_PARTIAL();
2014 ph10 510 MRRETURN(MATCH_NOMATCH);
2015 ph10 443 }
2016 ph10 178 GETCHARINCTEST(c, eptr);
2017     switch(c)
2018     {
2019 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2020 ph10 178 case 0x0a: /* LF */
2021     case 0x0b: /* VT */
2022     case 0x0c: /* FF */
2023     case 0x0d: /* CR */
2024     case 0x85: /* NEL */
2025     case 0x2028: /* LINE SEPARATOR */
2026     case 0x2029: /* PARAGRAPH SEPARATOR */
2027     break;
2028     }
2029     ecode++;
2030     break;
2031    
2032 nigel 77 #ifdef SUPPORT_UCP
2033     /* Check the next character by Unicode property. We will get here only
2034     if the support is in the binary; otherwise a compile-time error occurs. */
2035    
2036     case OP_PROP:
2037     case OP_NOTPROP:
2038 ph10 443 if (eptr >= md->end_subject)
2039 ph10 428 {
2040 ph10 443 SCHECK_PARTIAL();
2041 ph10 510 MRRETURN(MATCH_NOMATCH);
2042 ph10 443 }
2043 nigel 77 GETCHARINCTEST(c, eptr);
2044     {
2045 ph10 384 const ucd_record *prop = GET_UCD(c);
2046 nigel 77
2047 nigel 87 switch(ecode[1])
2048     {
2049     case PT_ANY:
2050 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2051 nigel 87 break;
2052 nigel 77
2053 nigel 87 case PT_LAMP:
2054 ph10 349 if ((prop->chartype == ucp_Lu ||
2055     prop->chartype == ucp_Ll ||
2056     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2057 ph10 510 MRRETURN(MATCH_NOMATCH);
2058 nigel 87 break;
2059    
2060     case PT_GC:
2061 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2062 ph10 510 MRRETURN(MATCH_NOMATCH);
2063 nigel 87 break;
2064    
2065     case PT_PC:
2066 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2067 ph10 510 MRRETURN(MATCH_NOMATCH);
2068 nigel 87 break;
2069    
2070     case PT_SC:
2071 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2072 ph10 510 MRRETURN(MATCH_NOMATCH);
2073 nigel 87 break;
2074    
2075     default:
2076     RRETURN(PCRE_ERROR_INTERNAL);
2077 nigel 77 }
2078 nigel 87
2079     ecode += 3;
2080 nigel 77 }
2081     break;
2082    
2083     /* Match an extended Unicode sequence. We will get here only if the support
2084     is in the binary; otherwise a compile-time error occurs. */
2085    
2086     case OP_EXTUNI:
2087 ph10 443 if (eptr >= md->end_subject)
2088 ph10 428 {
2089 ph10 443 SCHECK_PARTIAL();
2090 ph10 510 MRRETURN(MATCH_NOMATCH);
2091 ph10 443 }
2092 nigel 77 GETCHARINCTEST(c, eptr);
2093     {
2094 ph10 349 int category = UCD_CATEGORY(c);
2095 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2096 nigel 77 while (eptr < md->end_subject)
2097     {
2098     int len = 1;
2099     if (!utf8) c = *eptr; else
2100     {
2101     GETCHARLEN(c, eptr, len);
2102     }
2103 ph10 349 category = UCD_CATEGORY(c);
2104 nigel 77 if (category != ucp_M) break;
2105     eptr += len;
2106     }
2107     }
2108     ecode++;
2109     break;
2110     #endif
2111    
2112    
2113     /* Match a back reference, possibly repeatedly. Look past the end of the
2114     item to see if there is repeat information following. The code is similar
2115     to that for character classes, but repeated for efficiency. Then obey
2116     similar code to character type repeats - written out again for speed.
2117     However, if the referenced string is the empty string, always treat
2118     it as matched, any number of times (otherwise there could be infinite
2119     loops). */
2120    
2121     case OP_REF:
2122     {
2123     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2124 ph10 345 ecode += 3;
2125    
2126 ph10 336 /* If the reference is unset, there are two possibilities:
2127 ph10 345
2128 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2129     than the amount of subject left; this ensures that every attempt at a
2130     match fails. We can't just fail here, because of the possibility of
2131     quantifiers with zero minima.
2132 ph10 345
2133     (b) If the JavaScript compatibility flag is set, set the length to zero
2134     so that the back reference matches an empty string.
2135    
2136     Otherwise, set the length to the length of what was matched by the
2137 ph10 336 referenced subpattern. */
2138 ph10 345
2139 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2140 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2141 ph10 336 else
2142     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2143 nigel 77
2144     /* Set up for repetition, or handle the non-repeated case */
2145    
2146     switch (*ecode)
2147     {
2148     case OP_CRSTAR:
2149     case OP_CRMINSTAR:
2150     case OP_CRPLUS:
2151     case OP_CRMINPLUS:
2152     case OP_CRQUERY:
2153     case OP_CRMINQUERY:
2154     c = *ecode++ - OP_CRSTAR;
2155     minimize = (c & 1) != 0;
2156     min = rep_min[c]; /* Pick up values from tables; */
2157     max = rep_max[c]; /* zero for max => infinity */
2158     if (max == 0) max = INT_MAX;
2159     break;
2160    
2161     case OP_CRRANGE:
2162     case OP_CRMINRANGE:
2163     minimize = (*ecode == OP_CRMINRANGE);
2164     min = GET2(ecode, 1);
2165     max = GET2(ecode, 3);
2166     if (max == 0) max = INT_MAX;
2167     ecode += 5;
2168     break;
2169    
2170     default: /* No repeat follows */
2171 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2172 ph10 428 {
2173 ph10 443 CHECK_PARTIAL();
2174 ph10 510 MRRETURN(MATCH_NOMATCH);
2175 ph10 443 }
2176 nigel 77 eptr += length;
2177     continue; /* With the main loop */
2178     }
2179    
2180     /* If the length of the reference is zero, just continue with the
2181     main loop. */
2182 ph10 443
2183 nigel 77 if (length == 0) continue;
2184    
2185     /* First, ensure the minimum number of matches are present. We get back
2186     the length of the reference string explicitly rather than passing the
2187     address of eptr, so that eptr can be a register variable. */
2188    
2189     for (i = 1; i <= min; i++)
2190     {
2191 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2192 ph10 426 {
2193 ph10 427 CHECK_PARTIAL();
2194 ph10 510 MRRETURN(MATCH_NOMATCH);
2195 ph10 427 }
2196 nigel 77 eptr += length;
2197     }
2198    
2199     /* If min = max, continue at the same level without recursion.
2200     They are not both allowed to be zero. */
2201    
2202     if (min == max) continue;
2203    
2204     /* If minimizing, keep trying and advancing the pointer */
2205    
2206     if (minimize)
2207     {
2208     for (fi = min;; fi++)
2209     {
2210 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2211 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2213 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2214 ph10 426 {
2215 ph10 427 CHECK_PARTIAL();
2216 ph10 510 MRRETURN(MATCH_NOMATCH);
2217 ph10 427 }
2218 nigel 77 eptr += length;
2219     }
2220     /* Control never gets here */
2221     }
2222    
2223     /* If maximizing, find the longest string and work backwards */
2224    
2225     else
2226     {
2227     pp = eptr;
2228     for (i = min; i < max; i++)
2229     {
2230 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2231 ph10 462 {
2232 ph10 463 CHECK_PARTIAL();
2233 ph10 462 break;
2234 ph10 463 }
2235 nigel 77 eptr += length;
2236     }
2237     while (eptr >= pp)
2238     {
2239 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2240 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2241     eptr -= length;
2242     }
2243 ph10 510 MRRETURN(MATCH_NOMATCH);
2244 nigel 77 }
2245     }
2246     /* Control never gets here */
2247    
2248     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2249     used when all the characters in the class have values in the range 0-255,
2250     and either the matching is caseful, or the characters are in the range
2251     0-127 when UTF-8 processing is enabled. The only difference between
2252     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2253     encountered.
2254    
2255     First, look past the end of the item to see if there is repeat information
2256     following. Then obey similar code to character type repeats - written out
2257     again for speed. */
2258    
2259     case OP_NCLASS:
2260     case OP_CLASS:
2261     {
2262     data = ecode + 1; /* Save for matching */
2263     ecode += 33; /* Advance past the item */
2264    
2265     switch (*ecode)
2266     {
2267     case OP_CRSTAR:
2268     case OP_CRMINSTAR:
2269     case OP_CRPLUS:
2270     case OP_CRMINPLUS:
2271     case OP_CRQUERY:
2272     case OP_CRMINQUERY:
2273     c = *ecode++ - OP_CRSTAR;
2274     minimize = (c & 1) != 0;
2275     min = rep_min[c]; /* Pick up values from tables; */
2276     max = rep_max[c]; /* zero for max => infinity */
2277     if (max == 0) max = INT_MAX;
2278     break;
2279    
2280     case OP_CRRANGE:
2281     case OP_CRMINRANGE:
2282     minimize = (*ecode == OP_CRMINRANGE);
2283     min = GET2(ecode, 1);
2284     max = GET2(ecode, 3);
2285     if (max == 0) max = INT_MAX;
2286     ecode += 5;
2287     break;
2288    
2289     default: /* No repeat follows */
2290     min = max = 1;
2291     break;
2292     }
2293    
2294     /* First, ensure the minimum number of matches are present. */
2295    
2296     #ifdef SUPPORT_UTF8
2297     /* UTF-8 mode */
2298     if (utf8)
2299     {
2300     for (i = 1; i <= min; i++)
2301     {
2302 ph10 427 if (eptr >= md->end_subject)
2303 ph10 426 {
2304 ph10 428 SCHECK_PARTIAL();
2305 ph10 510 MRRETURN(MATCH_NOMATCH);
2306 ph10 427 }
2307 nigel 77 GETCHARINC(c, eptr);
2308     if (c > 255)
2309     {
2310 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2311 nigel 77 }
2312     else
2313     {
2314 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2315 nigel 77 }
2316     }
2317     }
2318     else
2319     #endif
2320     /* Not UTF-8 mode */
2321     {
2322     for (i = 1; i <= min; i++)
2323     {
2324 ph10 427 if (eptr >= md->end_subject)
2325 ph10 426 {
2326 ph10 428 SCHECK_PARTIAL();
2327 ph10 510 MRRETURN(MATCH_NOMATCH);
2328 ph10 427 }
2329 nigel 77 c = *eptr++;
2330 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2331 nigel 77 }
2332     }
2333    
2334     /* If max == min we can continue with the main loop without the
2335     need to recurse. */
2336    
2337     if (min == max) continue;
2338    
2339     /* If minimizing, keep testing the rest of the expression and advancing
2340     the pointer while it matches the class. */
2341    
2342     if (minimize)
2343     {
2344     #ifdef SUPPORT_UTF8
2345     /* UTF-8 mode */
2346     if (utf8)
2347     {
2348     for (fi = min;; fi++)
2349     {
2350 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2351 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2352 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2353 ph10 427 if (eptr >= md->end_subject)
2354 ph10 426 {
2355 ph10 427 SCHECK_PARTIAL();
2356 ph10 510 MRRETURN(MATCH_NOMATCH);
2357 ph10 427 }
2358 nigel 77 GETCHARINC(c, eptr);
2359     if (c > 255)
2360     {
2361 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2362 nigel 77 }
2363     else
2364     {
2365 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2366 nigel 77 }
2367     }
2368     }
2369     else
2370     #endif
2371     /* Not UTF-8 mode */
2372     {
2373     for (fi = min;; fi++)
2374     {
2375 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2376 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2377 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2378 ph10 427 if (eptr >= md->end_subject)
2379 ph10 426 {
2380 ph10 427 SCHECK_PARTIAL();
2381 ph10 510 MRRETURN(MATCH_NOMATCH);
2382 ph10 427 }
2383 nigel 77 c = *eptr++;
2384 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2385 nigel 77 }
2386     }
2387     /* Control never gets here */
2388     }
2389    
2390     /* If maximizing, find the longest possible run, then work backwards. */
2391    
2392     else
2393     {
2394     pp = eptr;
2395    
2396     #ifdef SUPPORT_UTF8
2397     /* UTF-8 mode */
2398     if (utf8)
2399     {
2400     for (i = min; i < max; i++)
2401     {
2402     int len = 1;
2403 ph10 463 if (eptr >= md->end_subject)
2404 ph10 462 {
2405 ph10 463 SCHECK_PARTIAL();
2406 ph10 462 break;
2407 ph10 463 }
2408 nigel 77 GETCHARLEN(c, eptr, len);
2409     if (c > 255)
2410     {
2411     if (op == OP_CLASS) break;
2412     }
2413     else
2414     {
2415     if ((data[c/8] & (1 << (c&7))) == 0) break;
2416     }
2417     eptr += len;
2418     }
2419     for (;;)
2420     {
2421 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2422 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2423     if (eptr-- == pp) break; /* Stop if tried at original pos */
2424     BACKCHAR(eptr);
2425     }
2426     }
2427     else
2428     #endif
2429     /* Not UTF-8 mode */
2430     {
2431     for (i = min; i < max; i++)
2432     {
2433 ph10 463 if (eptr >= md->end_subject)
2434 ph10 462 {
2435 ph10 463 SCHECK_PARTIAL();
2436 ph10 462 break;
2437 ph10 463 }
2438 nigel 77 c = *eptr;
2439     if ((data[c/8] & (1 << (c&7))) == 0) break;
2440     eptr++;
2441     }
2442     while (eptr >= pp)
2443     {
2444 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2445 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2446 nigel 77 eptr--;
2447     }
2448     }
2449    
2450 ph10 510 MRRETURN(MATCH_NOMATCH);
2451 nigel 77 }
2452     }
2453     /* Control never gets here */
2454    
2455    
2456     /* Match an extended character class. This opcode is encountered only
2457 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2458     mode, because Unicode properties are supported in non-UTF-8 mode. */
2459 nigel 77
2460     #ifdef SUPPORT_UTF8
2461     case OP_XCLASS:
2462     {
2463     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2464     ecode += GET(ecode, 1); /* Advance past the item */
2465    
2466     switch (*ecode)
2467     {
2468     case OP_CRSTAR:
2469     case OP_CRMINSTAR:
2470     case OP_CRPLUS:
2471     case OP_CRMINPLUS:
2472     case OP_CRQUERY:
2473     case OP_CRMINQUERY:
2474     c = *ecode++ - OP_CRSTAR;
2475     minimize = (c & 1) != 0;
2476     min = rep_min[c]; /* Pick up values from tables; */
2477     max = rep_max[c]; /* zero for max => infinity */
2478     if (max == 0) max = INT_MAX;
2479     break;
2480    
2481     case OP_CRRANGE:
2482     case OP_CRMINRANGE:
2483     minimize = (*ecode == OP_CRMINRANGE);
2484     min = GET2(ecode, 1);
2485     max = GET2(ecode, 3);
2486     if (max == 0) max = INT_MAX;
2487     ecode += 5;
2488     break;
2489    
2490     default: /* No repeat follows */
2491     min = max = 1;
2492     break;
2493     }
2494    
2495     /* First, ensure the minimum number of matches are present. */
2496    
2497     for (i = 1; i <= min; i++)
2498     {
2499 ph10 427 if (eptr >= md->end_subject)
2500 ph10 426 {
2501     SCHECK_PARTIAL();
2502 ph10 510 MRRETURN(MATCH_NOMATCH);
2503 ph10 427 }
2504 ph10 384 GETCHARINCTEST(c, eptr);
2505 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2506 nigel 77 }
2507    
2508     /* If max == min we can continue with the main loop without the
2509     need to recurse. */
2510    
2511     if (min == max) continue;
2512    
2513     /* If minimizing, keep testing the rest of the expression and advancing
2514     the pointer while it matches the class. */
2515    
2516     if (minimize)
2517     {
2518     for (fi = min;; fi++)
2519     {
2520 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2521 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2522 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2523 ph10 427 if (eptr >= md->end_subject)
2524 ph10 426 {
2525 ph10 427 SCHECK_PARTIAL();
2526 ph10 510 MRRETURN(MATCH_NOMATCH);
2527 ph10 427 }
2528 ph10 384 GETCHARINCTEST(c, eptr);
2529 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2530 nigel 77 }
2531     /* Control never gets here */
2532     }
2533    
2534     /* If maximizing, find the longest possible run, then work backwards. */
2535    
2536     else
2537     {
2538     pp = eptr;
2539     for (i = min; i < max; i++)
2540     {
2541     int len = 1;
2542 ph10 463 if (eptr >= md->end_subject)
2543 ph10 462 {
2544 ph10 463 SCHECK_PARTIAL();
2545 ph10 462 break;
2546 ph10 463 }
2547 ph10 384 GETCHARLENTEST(c, eptr, len);
2548 nigel 77 if (!_pcre_xclass(c, data)) break;
2549     eptr += len;
2550     }
2551     for(;;)
2552     {
2553 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2554 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2555     if (eptr-- == pp) break; /* Stop if tried at original pos */
2556 ph10 214 if (utf8) BACKCHAR(eptr);
2557 nigel 77 }
2558 ph10 510 MRRETURN(MATCH_NOMATCH);
2559 nigel 77 }
2560    
2561     /* Control never gets here */
2562     }
2563     #endif /* End of XCLASS */
2564    
2565     /* Match a single character, casefully */
2566    
2567     case OP_CHAR:
2568     #ifdef SUPPORT_UTF8
2569     if (utf8)
2570     {
2571     length = 1;
2572     ecode++;
2573     GETCHARLEN(fc, ecode, length);
2574 ph10 443 if (length > md->end_subject - eptr)
2575 ph10 428 {
2576     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2577 ph10 510 MRRETURN(MATCH_NOMATCH);
2578 ph10 443 }
2579 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2580 nigel 77 }
2581     else
2582     #endif
2583    
2584     /* Non-UTF-8 mode */
2585     {
2586 ph10 443 if (md->end_subject - eptr < 1)
2587 ph10 428 {
2588     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2589 ph10 510 MRRETURN(MATCH_NOMATCH);
2590 ph10 443 }
2591 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2592 nigel 77 ecode += 2;
2593     }
2594     break;
2595    
2596     /* Match a single character, caselessly */
2597    
2598     case OP_CHARNC:
2599     #ifdef SUPPORT_UTF8
2600     if (utf8)
2601     {
2602     length = 1;
2603     ecode++;
2604     GETCHARLEN(fc, ecode, length);
2605    
2606 ph10 443 if (length > md->end_subject - eptr)
2607 ph10 428 {
2608     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2609 ph10 510 MRRETURN(MATCH_NOMATCH);
2610 ph10 443 }
2611 nigel 77
2612     /* If the pattern character's value is < 128, we have only one byte, and
2613     can use the fast lookup table. */
2614    
2615     if (fc < 128)
2616     {
2617 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2618 nigel 77 }
2619    
2620     /* Otherwise we must pick up the subject character */
2621    
2622     else
2623     {
2624 nigel 93 unsigned int dc;
2625 nigel 77 GETCHARINC(dc, eptr);
2626     ecode += length;
2627    
2628     /* If we have Unicode property support, we can use it to test the other
2629 nigel 87 case of the character, if there is one. */
2630 nigel 77
2631     if (fc != dc)
2632     {
2633     #ifdef SUPPORT_UCP
2634 ph10 349 if (dc != UCD_OTHERCASE(fc))
2635 nigel 77 #endif
2636 ph10 510 MRRETURN(MATCH_NOMATCH);
2637 nigel 77 }
2638     }
2639     }
2640     else
2641     #endif /* SUPPORT_UTF8 */
2642    
2643     /* Non-UTF-8 mode */
2644     {
2645 ph10 443 if (md->end_subject - eptr < 1)
2646 ph10 428 {
2647 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2648 ph10 510 MRRETURN(MATCH_NOMATCH);
2649 ph10 443 }
2650 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2651 nigel 77 ecode += 2;
2652     }
2653     break;
2654    
2655 nigel 93 /* Match a single character repeatedly. */
2656 nigel 77
2657     case OP_EXACT:
2658     min = max = GET2(ecode, 1);
2659     ecode += 3;
2660     goto REPEATCHAR;
2661    
2662 nigel 93 case OP_POSUPTO:
2663     possessive = TRUE;
2664     /* Fall through */
2665    
2666 nigel 77 case OP_UPTO:
2667     case OP_MINUPTO:
2668     min = 0;
2669     max = GET2(ecode, 1);
2670     minimize = *ecode == OP_MINUPTO;
2671     ecode += 3;
2672     goto REPEATCHAR;
2673    
2674 nigel 93 case OP_POSSTAR:
2675     possessive = TRUE;
2676     min = 0;
2677     max = INT_MAX;
2678     ecode++;
2679     goto REPEATCHAR;
2680    
2681     case OP_POSPLUS:
2682     possessive = TRUE;
2683     min = 1;
2684     max = INT_MAX;
2685     ecode++;
2686     goto REPEATCHAR;
2687    
2688     case OP_POSQUERY:
2689     possessive = TRUE;
2690     min = 0;
2691     max = 1;
2692     ecode++;
2693     goto REPEATCHAR;
2694    
2695 nigel 77 case OP_STAR:
2696     case OP_MINSTAR:
2697     case OP_PLUS:
2698     case OP_MINPLUS:
2699     case OP_QUERY:
2700     case OP_MINQUERY:
2701     c = *ecode++ - OP_STAR;
2702     minimize = (c & 1) != 0;
2703 ph10 443
2704 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2705     max = rep_max[c]; /* zero for max => infinity */
2706     if (max == 0) max = INT_MAX;
2707    
2708 ph10 426 /* Common code for all repeated single-character matches. */
2709 nigel 77
2710     REPEATCHAR:
2711     #ifdef SUPPORT_UTF8
2712     if (utf8)
2713     {
2714     length = 1;
2715     charptr = ecode;
2716     GETCHARLEN(fc, ecode, length);
2717     ecode += length;
2718    
2719     /* Handle multibyte character matching specially here. There is
2720     support for caseless matching if UCP support is present. */
2721    
2722     if (length > 1)
2723     {
2724     #ifdef SUPPORT_UCP
2725 nigel 93 unsigned int othercase;
2726 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2727 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2728 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2729 ph10 115 else oclength = 0;
2730 nigel 77 #endif /* SUPPORT_UCP */
2731    
2732     for (i = 1; i <= min; i++)
2733     {
2734 ph10 426 if (eptr <= md->end_subject - length &&
2735     memcmp(eptr, charptr, length) == 0) eptr += length;
2736 ph10 123 #ifdef SUPPORT_UCP
2737 ph10 426 else if (oclength > 0 &&
2738     eptr <= md->end_subject - oclength &&
2739     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2740     #endif /* SUPPORT_UCP */
2741 nigel 77 else
2742     {
2743 ph10 426 CHECK_PARTIAL();
2744 ph10 510 MRRETURN(MATCH_NOMATCH);
2745 nigel 77 }
2746     }
2747    
2748     if (min == max) continue;
2749    
2750     if (minimize)
2751     {
2752     for (fi = min;; fi++)
2753     {
2754 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2755 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2756 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2757 ph10 426 if (eptr <= md->end_subject - length &&
2758     memcmp(eptr, charptr, length) == 0) eptr += length;
2759 ph10 123 #ifdef SUPPORT_UCP
2760 ph10 426 else if (oclength > 0 &&
2761     eptr <= md->end_subject - oclength &&
2762     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2763     #endif /* SUPPORT_UCP */
2764 nigel 77 else
2765     {
2766 ph10 426 CHECK_PARTIAL();
2767 ph10 510 MRRETURN(MATCH_NOMATCH);
2768 nigel 77 }
2769     }
2770     /* Control never gets here */
2771     }
2772 nigel 93
2773     else /* Maximize */
2774 nigel 77 {
2775     pp = eptr;
2776     for (i = min; i < max; i++)
2777     {
2778 ph10 426 if (eptr <= md->end_subject - length &&
2779     memcmp(eptr, charptr, length) == 0) eptr += length;
2780 ph10 123 #ifdef SUPPORT_UCP
2781 ph10 426 else if (oclength > 0 &&
2782     eptr <= md->end_subject - oclength &&
2783     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2784     #endif /* SUPPORT_UCP */
2785 ph10 463 else
2786 ph10 462 {
2787 ph10 463 CHECK_PARTIAL();
2788 ph10 462 break;
2789 ph10 463 }
2790 nigel 77 }
2791 nigel 93
2792     if (possessive) continue;
2793 ph10 427
2794 ph10 120 for(;;)
2795 ph10 426 {
2796     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2797     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2798 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2799 ph10 115 #ifdef SUPPORT_UCP
2800 ph10 426 eptr--;
2801     BACKCHAR(eptr);
2802 ph10 123 #else /* without SUPPORT_UCP */
2803 ph10 426 eptr -= length;
2804 ph10 123 #endif /* SUPPORT_UCP */
2805 ph10 426 }
2806 nigel 77 }
2807     /* Control never gets here */
2808     }
2809    
2810     /* If the length of a UTF-8 character is 1, we fall through here, and
2811     obey the code as for non-UTF-8 characters below, though in this case the
2812     value of fc will always be < 128. */
2813     }
2814     else
2815     #endif /* SUPPORT_UTF8 */
2816    
2817     /* When not in UTF-8 mode, load a single-byte character. */
2818    
2819 ph10 426 fc = *ecode++;
2820 ph10 443
2821 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2822     may not be in UTF-8 mode. The code is duplicated for the caseless and
2823     caseful cases, for speed, since matching characters is likely to be quite
2824     common. First, ensure the minimum number of matches are present. If min =
2825     max, continue at the same level without recursing. Otherwise, if
2826     minimizing, keep trying the rest of the expression and advancing one
2827     matching character if failing, up to the maximum. Alternatively, if
2828     maximizing, find the maximum number of characters and work backwards. */
2829    
2830     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2831     max, eptr));
2832    
2833     if ((ims & PCRE_CASELESS) != 0)
2834     {
2835     fc = md->lcc[fc];
2836     for (i = 1; i <= min; i++)
2837 ph10 426 {
2838     if (eptr >= md->end_subject)
2839     {
2840     SCHECK_PARTIAL();
2841 ph10 510 MRRETURN(MATCH_NOMATCH);
2842 ph10 426 }
2843 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2844 ph10 426 }
2845 nigel 77 if (min == max) continue;
2846     if (minimize)
2847     {
2848     for (fi = min;; fi++)
2849     {
2850 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2851 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2852 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2853 ph10 426 if (eptr >= md->end_subject)
2854     {
2855 ph10 427 SCHECK_PARTIAL();
2856 ph10 510 MRRETURN(MATCH_NOMATCH);
2857 ph10 426 }
2858 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2859 nigel 77 }
2860     /* Control never gets here */
2861     }
2862 nigel 93 else /* Maximize */
2863 nigel 77 {
2864     pp = eptr;
2865     for (i = min; i < max; i++)
2866     {
2867 ph10 463 if (eptr >= md->end_subject)
2868 ph10 462 {
2869     SCHECK_PARTIAL();
2870     break;
2871 ph10 463 }
2872 ph10 462 if (fc != md->lcc[*eptr]) break;
2873 nigel 77 eptr++;
2874     }
2875 ph10 427
2876 nigel 93 if (possessive) continue;
2877 ph10 427
2878 nigel 77 while (eptr >= pp)
2879     {
2880 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2881 nigel 77 eptr--;
2882     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2883     }
2884 ph10 510 MRRETURN(MATCH_NOMATCH);
2885 nigel 77 }
2886     /* Control never gets here */
2887     }
2888    
2889     /* Caseful comparisons (includes all multi-byte characters) */
2890    
2891     else
2892     {
2893 ph10 427 for (i = 1; i <= min; i++)
2894 ph10 426 {
2895     if (eptr >= md->end_subject)
2896     {
2897     SCHECK_PARTIAL();
2898 ph10 510 MRRETURN(MATCH_NOMATCH);
2899 ph10 426 }
2900 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2901 ph10 427 }
2902 ph10 443
2903 nigel 77 if (min == max) continue;
2904 ph10 443
2905 nigel 77 if (minimize)
2906     {
2907     for (fi = min;; fi++)
2908     {
2909 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2910 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2911 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2912 ph10 426 if (eptr >= md->end_subject)
2913 ph10 427 {
2914 ph10 426 SCHECK_PARTIAL();
2915 ph10 510 MRRETURN(MATCH_NOMATCH);
2916 ph10 427 }
2917 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2918 nigel 77 }
2919     /* Control never gets here */
2920     }
2921 nigel 93 else /* Maximize */
2922 nigel 77 {
2923     pp = eptr;
2924     for (i = min; i < max; i++)
2925     {
2926 ph10 463 if (eptr >= md->end_subject)
2927 ph10 462 {
2928 ph10 463 SCHECK_PARTIAL();
2929 ph10 462 break;
2930 ph10 463 }
2931 ph10 462 if (fc != *eptr) break;
2932 nigel 77 eptr++;
2933     }
2934 nigel 93 if (possessive) continue;
2935 ph10 443
2936 nigel 77 while (eptr >= pp)
2937     {
2938 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2939 nigel 77 eptr--;
2940     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2941     }
2942 ph10 510 MRRETURN(MATCH_NOMATCH);
2943 nigel 77 }
2944     }
2945     /* Control never gets here */
2946    
2947     /* Match a negated single one-byte character. The character we are
2948     checking can be multibyte. */
2949    
2950     case OP_NOT:
2951 ph10 443 if (eptr >= md->end_subject)
2952 ph10 428 {
2953 ph10 443 SCHECK_PARTIAL();
2954 ph10 510 MRRETURN(MATCH_NOMATCH);
2955 ph10 443 }
2956 nigel 77 ecode++;
2957     GETCHARINCTEST(c, eptr);
2958     if ((ims & PCRE_CASELESS) != 0)
2959     {
2960     #ifdef SUPPORT_UTF8
2961     if (c < 256)
2962     #endif
2963     c = md->lcc[c];
2964 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2965 nigel 77 }
2966     else
2967     {
2968 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2969 nigel 77 }
2970     break;
2971    
2972     /* Match a negated single one-byte character repeatedly. This is almost a
2973     repeat of the code for a repeated single character, but I haven't found a
2974     nice way of commoning these up that doesn't require a test of the
2975     positive/negative option for each character match. Maybe that wouldn't add
2976     very much to the time taken, but character matching *is* what this is all
2977     about... */
2978    
2979     case OP_NOTEXACT:
2980     min = max = GET2(ecode, 1);
2981     ecode += 3;
2982     goto REPEATNOTCHAR;
2983    
2984     case OP_NOTUPTO:
2985     case OP_NOTMINUPTO:
2986     min = 0;
2987     max = GET2(ecode, 1);
2988     minimize = *ecode == OP_NOTMINUPTO;
2989     ecode += 3;
2990     goto REPEATNOTCHAR;
2991    
2992 nigel 93 case OP_NOTPOSSTAR:
2993     possessive = TRUE;
2994     min = 0;
2995     max = INT_MAX;
2996     ecode++;
2997     goto REPEATNOTCHAR;
2998    
2999     case OP_NOTPOSPLUS:
3000     possessive = TRUE;
3001     min = 1;
3002     max = INT_MAX;
3003     ecode++;
3004     goto REPEATNOTCHAR;
3005    
3006     case OP_NOTPOSQUERY:
3007     possessive = TRUE;
3008     min = 0;
3009     max = 1;
3010     ecode++;
3011     goto REPEATNOTCHAR;
3012    
3013     case OP_NOTPOSUPTO:
3014     possessive = TRUE;
3015     min = 0;
3016     max = GET2(ecode, 1);
3017     ecode += 3;
3018     goto REPEATNOTCHAR;
3019    
3020 nigel 77 case OP_NOTSTAR:
3021     case OP_NOTMINSTAR:
3022     case OP_NOTPLUS:
3023     case OP_NOTMINPLUS:
3024     case OP_NOTQUERY:
3025     case OP_NOTMINQUERY:
3026     c = *ecode++ - OP_NOTSTAR;
3027     minimize = (c & 1) != 0;
3028     min = rep_min[c]; /* Pick up values from tables; */
3029     max = rep_max[c]; /* zero for max => infinity */
3030     if (max == 0) max = INT_MAX;
3031    
3032 ph10 426 /* Common code for all repeated single-byte matches. */
3033 nigel 77
3034     REPEATNOTCHAR:
3035     fc = *ecode++;
3036    
3037     /* The code is duplicated for the caseless and caseful cases, for speed,
3038     since matching characters is likely to be quite common. First, ensure the
3039     minimum number of matches are present. If min = max, continue at the same
3040     level without recursing. Otherwise, if minimizing, keep trying the rest of
3041     the expression and advancing one matching character if failing, up to the
3042     maximum. Alternatively, if maximizing, find the maximum number of
3043     characters and work backwards. */
3044    
3045     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3046     max, eptr));
3047    
3048     if ((ims & PCRE_CASELESS) != 0)
3049     {
3050     fc = md->lcc[fc];
3051    
3052     #ifdef SUPPORT_UTF8
3053     /* UTF-8 mode */
3054     if (utf8)
3055     {
3056 nigel 93 register unsigned int d;
3057 nigel 77 for (i = 1; i <= min; i++)
3058     {
3059 ph10 426 if (eptr >= md->end_subject)
3060     {
3061     SCHECK_PARTIAL();
3062 ph10 510 MRRETURN(MATCH_NOMATCH);
3063 ph10 427 }
3064 nigel 77 GETCHARINC(d, eptr);
3065     if (d < 256) d = md->lcc[d];
3066 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3067 nigel 77 }
3068     }
3069     else
3070     #endif
3071    
3072     /* Not UTF-8 mode */
3073     {
3074     for (i = 1; i <= min; i++)
3075 ph10 426 {
3076     if (eptr >= md->end_subject)
3077     {
3078     SCHECK_PARTIAL();
3079 ph10 510 MRRETURN(MATCH_NOMATCH);
3080 ph10 427 }
3081 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3082 ph10 427 }
3083 nigel 77 }
3084    
3085     if (min == max) continue;
3086    
3087     if (minimize)
3088     {
3089     #ifdef SUPPORT_UTF8
3090     /* UTF-8 mode */
3091     if (utf8)
3092     {
3093 nigel 93 register unsigned int d;
3094 nigel 77 for (fi = min;; fi++)
3095     {
3096 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3097 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3098 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3099 ph10 427 if (eptr >= md->end_subject)
3100 ph10 426 {
3101 ph10 427 SCHECK_PARTIAL();
3102 ph10 510 MRRETURN(MATCH_NOMATCH);
3103 ph10 427 }
3104 nigel 77 GETCHARINC(d, eptr);
3105     if (d < 256) d = md->lcc[d];
3106 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3107 nigel 77 }
3108     }
3109     else
3110     #endif
3111     /* Not UTF-8 mode */
3112     {
3113     for (fi = min;; fi++)
3114     {
3115 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3116 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3117 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3118 ph10 426 if (eptr >= md->end_subject)
3119     {
3120     SCHECK_PARTIAL();
3121 ph10 510 MRRETURN(MATCH_NOMATCH);
3122 ph10 426 }
3123 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3124 nigel 77 }
3125     }
3126     /* Control never gets here */
3127     }
3128    
3129     /* Maximize case */
3130    
3131     else
3132     {
3133     pp = eptr;
3134    
3135     #ifdef SUPPORT_UTF8
3136     /* UTF-8 mode */
3137     if (utf8)
3138     {
3139 nigel 93 register unsigned int d;
3140 nigel 77 for (i = min; i < max; i++)
3141     {
3142     int len = 1;
3143 ph10 463 if (eptr >= md->end_subject)
3144 ph10 462 {
3145 ph10 463 SCHECK_PARTIAL();
3146 ph10 462 break;
3147 ph10 463 }
3148 nigel 77 GETCHARLEN(d, eptr, len);
3149     if (d < 256) d = md->lcc[d];
3150     if (fc == d) break;
3151     eptr += len;
3152     }
3153 nigel 93 if (possessive) continue;
3154     for(;;)
3155 nigel 77 {
3156 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3157 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3158     if (eptr-- == pp) break; /* Stop if tried at original pos */
3159     BACKCHAR(eptr);
3160     }
3161     }
3162     else
3163     #endif
3164     /* Not UTF-8 mode */
3165     {
3166     for (i = min; i < max; i++)
3167     {
3168 ph10 463 if (eptr >= md->end_subject)
3169 ph10 462 {
3170     SCHECK_PARTIAL();
3171     break;
3172 ph10 463 }
3173 ph10 462 if (fc == md->lcc[*eptr]) break;
3174 nigel 77 eptr++;
3175     }
3176 nigel 93 if (possessive) continue;
3177 nigel 77 while (eptr >= pp)
3178     {
3179 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3180 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3181     eptr--;
3182     }
3183     }
3184    
3185 ph10 510 MRRETURN(MATCH_NOMATCH);
3186 nigel 77 }
3187     /* Control never gets here */
3188     }
3189    
3190     /* Caseful comparisons */
3191    
3192     else
3193     {
3194     #ifdef SUPPORT_UTF8
3195     /* UTF-8 mode */
3196     if (utf8)
3197     {
3198 nigel 93 register unsigned int d;
3199 nigel 77 for (i = 1; i <= min; i++)
3200     {
3201 ph10 426 if (eptr >= md->end_subject)
3202     {
3203     SCHECK_PARTIAL();
3204 ph10 510 MRRETURN(MATCH_NOMATCH);
3205 ph10 427 }
3206 nigel 77 GETCHARINC(d, eptr);
3207 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3208 nigel 77 }
3209     }
3210     else
3211     #endif
3212     /* Not UTF-8 mode */
3213     {
3214     for (i = 1; i <= min; i++)
3215 ph10 426 {
3216     if (eptr >= md->end_subject)
3217     {
3218     SCHECK_PARTIAL();
3219 ph10 510 MRRETURN(MATCH_NOMATCH);
3220 ph10 427 }
3221 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3222 ph10 427 }
3223 nigel 77 }
3224    
3225     if (min == max) continue;
3226    
3227     if (minimize)
3228     {
3229     #ifdef SUPPORT_UTF8
3230     /* UTF-8 mode */
3231     if (utf8)
3232     {
3233 nigel 93 register unsigned int d;
3234 nigel 77 for (fi = min;; fi++)
3235     {
3236 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3237 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3238 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3239 ph10 427 if (eptr >= md->end_subject)
3240 ph10 426 {
3241 ph10 427 SCHECK_PARTIAL();
3242 ph10 510 MRRETURN(MATCH_NOMATCH);
3243 ph10 427 }
3244 nigel 77 GETCHARINC(d, eptr);
3245 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3246 nigel 77 }
3247     }
3248     else
3249     #endif
3250     /* Not UTF-8 mode */
3251     {
3252     for (fi = min;; fi++)
3253     {
3254 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3255 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3256 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3257 ph10 426 if (eptr >= md->end_subject)
3258     {
3259     SCHECK_PARTIAL();
3260 ph10 510 MRRETURN(MATCH_NOMATCH);
3261 ph10 427 }
3262 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3263 nigel 77 }
3264     }
3265     /* Control never gets here */
3266     }
3267    
3268     /* Maximize case */
3269    
3270     else
3271     {
3272     pp = eptr;
3273    
3274     #ifdef SUPPORT_UTF8
3275     /* UTF-8 mode */
3276     if (utf8)
3277     {
3278 nigel 93 register unsigned int d;
3279 nigel 77 for (i = min; i < max; i++)
3280     {
3281     int len = 1;
3282 ph10 463 if (eptr >= md->end_subject)
3283 ph10 462 {
3284 ph10 463 SCHECK_PARTIAL();
3285 ph10 462 break;
3286 ph10 463 }
3287 nigel 77 GETCHARLEN(d, eptr, len);
3288     if (fc == d) break;
3289     eptr += len;
3290     }
3291 nigel 93 if (possessive) continue;
3292 nigel 77 for(;;)
3293     {
3294 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3295 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3296     if (eptr-- == pp) break; /* Stop if tried at original pos */
3297     BACKCHAR(eptr);
3298     }
3299     }
3300     else
3301     #endif
3302     /* Not UTF-8 mode */
3303     {
3304     for (i = min; i < max; i++)
3305     {
3306 ph10 463 if (eptr >= md->end_subject)
3307 ph10 462 {
3308 ph10 463 SCHECK_PARTIAL();
3309 ph10 462 break;
3310 ph10 463 }
3311 ph10 462 if (fc == *eptr) break;
3312 nigel 77 eptr++;
3313     }
3314 nigel 93 if (possessive) continue;
3315 nigel 77 while (eptr >= pp)
3316     {
3317 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3318 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3319     eptr--;
3320     }
3321     }
3322    
3323 ph10 510 MRRETURN(MATCH_NOMATCH);
3324 nigel 77 }
3325     }
3326     /* Control never gets here */
3327    
3328     /* Match a single character type repeatedly; several different opcodes
3329     share code. This is very similar to the code for single characters, but we
3330     repeat it in the interests of efficiency. */
3331    
3332     case OP_TYPEEXACT:
3333     min = max = GET2(ecode, 1);
3334     minimize = TRUE;
3335     ecode += 3;
3336     goto REPEATTYPE;
3337    
3338     case OP_TYPEUPTO:
3339     case OP_TYPEMINUPTO:
3340     min = 0;
3341     max = GET2(ecode, 1);
3342     minimize = *ecode == OP_TYPEMINUPTO;
3343     ecode += 3;
3344     goto REPEATTYPE;
3345    
3346 nigel 93 case OP_TYPEPOSSTAR:
3347     possessive = TRUE;
3348     min = 0;
3349     max = INT_MAX;
3350     ecode++;
3351     goto REPEATTYPE;
3352    
3353     case OP_TYPEPOSPLUS:
3354     possessive = TRUE;
3355     min = 1;
3356     max = INT_MAX;
3357     ecode++;
3358     goto REPEATTYPE;
3359    
3360     case OP_TYPEPOSQUERY:
3361     possessive = TRUE;
3362     min = 0;
3363     max = 1;
3364     ecode++;
3365     goto REPEATTYPE;
3366    
3367     case OP_TYPEPOSUPTO:
3368     possessive = TRUE;
3369     min = 0;
3370     max = GET2(ecode, 1);
3371     ecode += 3;
3372     goto REPEATTYPE;
3373    
3374 nigel 77 case OP_TYPESTAR:
3375     case OP_TYPEMINSTAR:
3376     case OP_TYPEPLUS:
3377     case OP_TYPEMINPLUS:
3378     case OP_TYPEQUERY:
3379     case OP_TYPEMINQUERY:
3380     c = *ecode++ - OP_TYPESTAR;
3381     minimize = (c & 1) != 0;
3382     min = rep_min[c]; /* Pick up values from tables; */
3383     max = rep_max[c]; /* zero for max => infinity */
3384     if (max == 0) max = INT_MAX;
3385    
3386     /* Common code for all repeated single character type matches. Note that
3387     in UTF-8 mode, '.' matches a character of any length, but for the other
3388     character types, the valid characters are all one-byte long. */
3389    
3390     REPEATTYPE:
3391     ctype = *ecode++; /* Code for the character type */
3392    
3393     #ifdef SUPPORT_UCP
3394     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3395     {
3396     prop_fail_result = ctype == OP_NOTPROP;
3397     prop_type = *ecode++;
3398 nigel 87 prop_value = *ecode++;
3399 nigel 77 }
3400     else prop_type = -1;
3401     #endif
3402    
3403     /* First, ensure the minimum number of matches are present. Use inline
3404     code for maximizing the speed, and do the type test once at the start
3405 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3406 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3407     and single-bytes. */
3408    
3409     if (min > 0)
3410     {
3411     #ifdef SUPPORT_UCP
3412 nigel 87 if (prop_type >= 0)
3413 nigel 77 {
3414 nigel 87 switch(prop_type)
3415 nigel 77 {
3416 nigel 87 case PT_ANY:
3417 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3418 nigel 87 for (i = 1; i <= min; i++)
3419     {
3420 ph10 427 if (eptr >= md->end_subject)
3421 ph10 426 {
3422 ph10 427 SCHECK_PARTIAL();
3423 ph10 510 MRRETURN(MATCH_NOMATCH);
3424 ph10 427 }
3425 ph10 184 GETCHARINCTEST(c, eptr);
3426 nigel 87 }
3427     break;
3428    
3429     case PT_LAMP:
3430     for (i = 1; i <= min; i++)
3431     {
3432 ph10 427 if (eptr >= md->end_subject)
3433 ph10 426 {
3434 ph10 427 SCHECK_PARTIAL();
3435 ph10 510 MRRETURN(MATCH_NOMATCH);
3436 ph10 427 }
3437 ph10 184 GETCHARINCTEST(c, eptr);
3438 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3439 nigel 87 if ((prop_chartype == ucp_Lu ||
3440     prop_chartype == ucp_Ll ||
3441     prop_chartype == ucp_Lt) == prop_fail_result)
3442 ph10 510 MRRETURN(MATCH_NOMATCH);
3443 nigel 87 }
3444     break;
3445    
3446     case PT_GC:
3447     for (i = 1; i <= min; i++)
3448     {
3449 ph10 427 if (eptr >= md->end_subject)
3450 ph10 426 {
3451 ph10 427 SCHECK_PARTIAL();
3452 ph10 510 MRRETURN(MATCH_NOMATCH);
3453 ph10 427 }
3454 ph10 184 GETCHARINCTEST(c, eptr);
3455 ph10 349 prop_category = UCD_CATEGORY(c);
3456 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3457 ph10 510 MRRETURN(MATCH_NOMATCH);
3458 nigel 87 }
3459     break;
3460    
3461     case PT_PC:
3462     for (i = 1; i <= min; i++)
3463     {
3464 ph10 427 if (eptr >= md->end_subject)
3465 ph10 426 {
3466 ph10 427 SCHECK_PARTIAL();
3467 ph10 510 MRRETURN(MATCH_NOMATCH);
3468 ph10 427 }
3469 ph10 184 GETCHARINCTEST(c, eptr);
3470 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3471 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3472 ph10 510 MRRETURN(MATCH_NOMATCH);
3473 nigel 87 }
3474     break;
3475    
3476     case PT_SC:
3477     for (i = 1; i <= min; i++)
3478     {
3479 ph10 427 if (eptr >= md->end_subject)
3480 ph10 426 {
3481 ph10 427 SCHECK_PARTIAL();
3482 ph10 510 MRRETURN(MATCH_NOMATCH);
3483 ph10 427 }
3484 ph10 184 GETCHARINCTEST(c, eptr);
3485 ph10 349 prop_script = UCD_SCRIPT(c);
3486 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3487 ph10 510 MRRETURN(MATCH_NOMATCH);
3488 nigel 87 }
3489     break;
3490    
3491     default:
3492     RRETURN(PCRE_ERROR_INTERNAL);
3493 nigel 77 }
3494     }
3495    
3496     /* Match extended Unicode sequences. We will get here only if the
3497     support is in the binary; otherwise a compile-time error occurs. */
3498    
3499     else if (ctype == OP_EXTUNI)
3500     {
3501     for (i = 1; i <= min; i++)
3502     {
3503 ph10 427 if (eptr >= md->end_subject)
3504 ph10 426 {
3505 ph10 427 SCHECK_PARTIAL();
3506 ph10 510 MRRETURN(MATCH_NOMATCH);
3507 ph10 427 }
3508 nigel 77 GETCHARINCTEST(c, eptr);
3509 ph10 349 prop_category = UCD_CATEGORY(c);
3510 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3511 nigel 77 while (eptr < md->end_subject)
3512     {
3513     int len = 1;
3514 ph10 426 if (!utf8) c = *eptr;
3515     else { GETCHARLEN(c, eptr, len); }
3516 ph10 349 prop_category = UCD_CATEGORY(c);
3517 nigel 77 if (prop_category != ucp_M) break;
3518     eptr += len;
3519     }
3520     }
3521     }
3522    
3523     else
3524     #endif /* SUPPORT_UCP */
3525    
3526     /* Handle all other cases when the coding is UTF-8 */
3527    
3528     #ifdef SUPPORT_UTF8
3529     if (utf8) switch(ctype)
3530     {
3531     case OP_ANY:
3532     for (i = 1; i <= min; i++)
3533     {
3534 ph10 426 if (eptr >= md->end_subject)
3535     {
3536 ph10 427 SCHECK_PARTIAL();
3537 ph10 510 MRRETURN(MATCH_NOMATCH);
3538 ph10 427 }
3539 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3540 nigel 91 eptr++;
3541 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3542     }
3543     break;
3544    
3545 ph10 341 case OP_ALLANY:
3546     for (i = 1; i <= min; i++)
3547     {
3548 ph10 427 if (eptr >= md->end_subject)
3549 ph10 426 {
3550     SCHECK_PARTIAL();
3551 ph10 510 MRRETURN(MATCH_NOMATCH);
3552 ph10 427 }
3553 ph10 341 eptr++;
3554     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3555     }
3556     break;
3557    
3558 nigel 77 case OP_ANYBYTE:
3559 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3560 nigel 77 eptr += min;
3561     break;
3562    
3563 nigel 93 case OP_ANYNL:
3564     for (i = 1; i <= min; i++)
3565     {
3566 ph10 427 if (eptr >= md->end_subject)
3567 ph10 426 {
3568     SCHECK_PARTIAL();
3569 ph10 510 MRRETURN(MATCH_NOMATCH);
3570 ph10 427 }
3571 nigel 93 GETCHARINC(c, eptr);
3572     switch(c)
3573     {
3574 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3575 nigel 93 case 0x000d:
3576     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3577     break;
3578 ph10 231
3579 nigel 93 case 0x000a:
3580 ph10 231 break;
3581    
3582 nigel 93 case 0x000b:
3583     case 0x000c:
3584     case 0x0085:
3585     case 0x2028:
3586     case 0x2029:
3587 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3588 nigel 93 break;
3589     }
3590     }
3591     break;
3592    
3593 ph10 178 case OP_NOT_HSPACE:
3594     for (i = 1; i <= min; i++)
3595     {
3596 ph10 427 if (eptr >= md->end_subject)
3597 ph10 426 {
3598     SCHECK_PARTIAL();
3599 ph10 510 MRRETURN(MATCH_NOMATCH);
3600 ph10 427 }
3601 ph10 178 GETCHARINC(c, eptr);
3602     switch(c)
3603     {
3604     default: break;
3605     case 0x09: /* HT */
3606     case 0x20: /* SPACE */
3607     case 0xa0: /* NBSP */
3608     case 0x1680: /* OGHAM SPACE MARK */
3609     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3610     case 0x2000: /* EN QUAD */
3611     case 0x2001: /* EM QUAD */
3612     case 0x2002: /* EN SPACE */
3613     case 0x2003: /* EM SPACE */
3614     case 0x2004: /* THREE-PER-EM SPACE */
3615     case 0x2005: /* FOUR-PER-EM SPACE */
3616     case 0x2006: /* SIX-PER-EM SPACE */
3617     case 0x2007: /* FIGURE SPACE */
3618     case 0x2008: /* PUNCTUATION SPACE */
3619     case 0x2009: /* THIN SPACE */
3620     case 0x200A: /* HAIR SPACE */
3621     case 0x202f: /* NARROW NO-BREAK SPACE */
3622     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3623     case 0x3000: /* IDEOGRAPHIC SPACE */
3624 ph10 510 MRRETURN(MATCH_NOMATCH);
3625 ph10 178 }
3626     }
3627     break;
3628 ph10 182
3629 ph10 178 case OP_HSPACE:
3630     for (i = 1; i <= min; i++)
3631     {
3632 ph10 427 if (eptr >= md->end_subject)
3633 ph10 426 {
3634 ph10 427 SCHECK_PARTIAL();
3635 ph10 510 MRRETURN(MATCH_NOMATCH);
3636 ph10 427 }
3637 ph10 178 GETCHARINC(c, eptr);
3638     switch(c)
3639     {
3640 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3641 ph10 178 case 0x09: /* HT */
3642     case 0x20: /* SPACE */
3643     case 0xa0: /* NBSP */
3644     case 0x1680: /* OGHAM SPACE MARK */
3645     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3646     case 0x2000: /* EN QUAD */
3647     case 0x2001: /* EM QUAD */
3648     case 0x2002: /* EN SPACE */
3649     case 0x2003: /* EM SPACE */
3650     case 0x2004: /* THREE-PER-EM SPACE */
3651     case 0x2005: /* FOUR-PER-EM SPACE */
3652     case 0x2006: /* SIX-PER-EM SPACE */
3653     case 0x2007: /* FIGURE SPACE */
3654     case 0x2008: /* PUNCTUATION SPACE */
3655     case 0x2009: /* THIN SPACE */
3656     case 0x200A: /* HAIR SPACE */
3657     case 0x202f: /* NARROW NO-BREAK SPACE */
3658     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3659     case 0x3000: /* IDEOGRAPHIC SPACE */
3660     break;
3661     }
3662     }
3663     break;
3664 ph10 182
3665 ph10 178 case OP_NOT_VSPACE:
3666     for (i = 1; i <= min; i++)
3667     {
3668 ph10 427 if (eptr >= md->end_subject)
3669 ph10 426 {
3670 ph10 427 SCHECK_PARTIAL();
3671 ph10 510 MRRETURN(MATCH_NOMATCH);
3672 ph10 427 }
3673 ph10 178 GETCHARINC(c, eptr);
3674     switch(c)
3675     {
3676     default: break;
3677     case 0x0a: /* LF */
3678     case 0x0b: /* VT */
3679     case 0x0c: /* FF */
3680     case 0x0d: /* CR */
3681     case 0x85: /* NEL */
3682     case 0x2028: /* LINE SEPARATOR */
3683     case 0x2029: /* PARAGRAPH SEPARATOR */
3684 ph10 510 MRRETURN(MATCH_NOMATCH);
3685 ph10 178 }
3686     }
3687     break;
3688 ph10 182
3689 ph10 178 case OP_VSPACE:
3690     for (i = 1; i <= min; i++)
3691     {
3692 ph10 427 if (eptr >= md->end_subject)
3693 ph10 426 {
3694 ph10 427 SCHECK_PARTIAL();
3695 ph10 510 MRRETURN(MATCH_NOMATCH);
3696 ph10 427 }
3697 ph10 178 GETCHARINC(c, eptr);
3698     switch(c)
3699     {
3700 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3701 ph10 178 case 0x0a: /* LF */
3702     case 0x0b: /* VT */
3703     case 0x0c: /* FF */
3704     case 0x0d: /* CR */
3705     case 0x85: /* NEL */
3706     case 0x2028: /* LINE SEPARATOR */
3707     case 0x2029: /* PARAGRAPH SEPARATOR */
3708 ph10 182 break;
3709 ph10 178 }
3710     }
3711     break;
3712    
3713 nigel 77 case OP_NOT_DIGIT:
3714     for (i = 1; i <= min; i++)
3715     {
3716 ph10 427 if (eptr >= md->end_subject)
3717 ph10 426 {
3718 ph10 427 SCHECK_PARTIAL();
3719 ph10 510 MRRETURN(MATCH_NOMATCH);
3720 ph10 427 }
3721 nigel 77 GETCHARINC(c, eptr);
3722     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3723 ph10 510 MRRETURN(MATCH_NOMATCH);
3724 nigel 77 }
3725     break;
3726    
3727     case OP_DIGIT:
3728     for (i = 1; i <= min; i++)
3729     {
3730 ph10 427 if (eptr >= md->end_subject)
3731 ph10 426 {
3732 ph10 427 SCHECK_PARTIAL();
3733 ph10 510 MRRETURN(MATCH_NOMATCH);
3734 ph10 427 }
3735 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3736 ph10 510 MRRETURN(MATCH_NOMATCH);
3737 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3738     }
3739     break;
3740    
3741     case OP_NOT_WHITESPACE:
3742     for (i = 1; i <= min; i++)
3743     {
3744 ph10 427 if (eptr >= md->end_subject)
3745 ph10 426 {
3746 ph10 427 SCHECK_PARTIAL();
3747 ph10 510 MRRETURN(MATCH_NOMATCH);
3748 ph10 427 }
3749 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3750 ph10 510 MRRETURN(MATCH_NOMATCH);
3751 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3752 nigel 77 }
3753     break;
3754    
3755     case OP_WHITESPACE:
3756     for (i = 1; i <= min; i++)
3757     {
3758 ph10 427 if (eptr >= md->end_subject)
3759 ph10 426 {
3760 ph10 427 SCHECK_PARTIAL();
3761 ph10 510 MRRETURN(MATCH_NOMATCH);
3762 ph10 427 }
3763 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3764 ph10 510 MRRETURN(MATCH_NOMATCH);
3765 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3766     }
3767     break;
3768    
3769     case OP_NOT_WORDCHAR:
3770     for (i = 1; i <= min; i++)
3771     {
3772 ph10 482 if (eptr >= md->end_subject)
3773     {
3774     SCHECK_PARTIAL();
3775 ph10 510 MRRETURN(MATCH_NOMATCH);
3776 ph10 482 }
3777     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3778 ph10 510 MRRETURN(MATCH_NOMATCH);
3779 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3780 nigel 77 }
3781     break;
3782    
3783     case OP_WORDCHAR:
3784     for (i = 1; i <= min; i++)
3785     {
3786 ph10 427 if (eptr >= md->end_subject)
3787 ph10 426 {
3788 ph10 427 SCHECK_PARTIAL();
3789 ph10 510 MRRETURN(MATCH_NOMATCH);
3790 ph10 427 }
3791 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3792 ph10 510 MRRETURN(MATCH_NOMATCH);
3793 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3794     }
3795     break;
3796    
3797     default:
3798     RRETURN(PCRE_ERROR_INTERNAL);
3799     } /* End switch(ctype) */
3800    
3801     else
3802     #endif /* SUPPORT_UTF8 */
3803    
3804     /* Code for the non-UTF-8 case for minimum matching of operators other
3805 ph10 426 than OP_PROP and OP_NOTPROP. */
3806 nigel 77
3807     switch(ctype)
3808     {
3809     case OP_ANY:
3810 ph10 342 for (i = 1; i <= min; i++)
3811 nigel 77 {
3812 ph10 427 if (eptr >= md->end_subject)
3813 ph10 426 {
3814 ph10 427 SCHECK_PARTIAL();
3815 ph10 510 MRRETURN(MATCH_NOMATCH);
3816 ph10 427 }
3817 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3818 ph10 342 eptr++;
3819 nigel 77 }
3820     break;
3821    
3822 ph10 341 case OP_ALLANY:
3823 ph10 443 if (eptr > md->end_subject - min)
3824 ph10 428 {
3825 ph10 443 SCHECK_PARTIAL();
3826 ph10 510 MRRETURN(MATCH_NOMATCH);
3827 ph10 443 }
3828 ph10 341 eptr += min;
3829     break;
3830    
3831 nigel 77 case OP_ANYBYTE:
3832 ph10 443 if (eptr > md->end_subject - min)
3833 ph10 428 {
3834 ph10 443 SCHECK_PARTIAL();
3835 ph10 510 MRRETURN(MATCH_NOMATCH);
3836 ph10 443 }
3837 nigel 77 eptr += min;
3838     break;
3839    
3840 nigel 93 case OP_ANYNL:
3841     for (i = 1; i <= min; i++)
3842     {
3843 ph10 427 if (eptr >= md->end_subject)
3844 ph10 426 {
3845 ph10 427 SCHECK_PARTIAL();
3846 ph10 510 MRRETURN(MATCH_NOMATCH);
3847 ph10 427 }
3848 nigel 93 switch(*eptr++)
3849     {
3850 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3851 nigel 93 case 0x000d:
3852     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3853     break;
3854     case 0x000a:
3855 ph10 231 break;
3856    
3857 nigel 93 case 0x000b:
3858     case 0x000c:
3859     case 0x0085:
3860 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3861 nigel 93 break;
3862     }
3863     }
3864     break;
3865    
3866 ph10 178 case OP_NOT_HSPACE:
3867     for (i = 1; i <= min; i++)
3868     {
3869 ph10 427 if (eptr >= md->end_subject)
3870 ph10 426 {
3871 ph10 427 SCHECK_PARTIAL();
3872 ph10 510 MRRETURN(MATCH_NOMATCH);
3873 ph10 427 }
3874 ph10 178 switch(*eptr++)
3875     {
3876     default: break;
3877     case 0x09: /* HT */
3878     case 0x20: /* SPACE */
3879     case 0xa0: /* NBSP */
3880 ph10 510 MRRETURN(MATCH_NOMATCH);
3881 ph10 178 }
3882     }
3883     break;
3884    
3885     case OP_HSPACE:
3886     for (i = 1; i <= min; i++)
3887     {
3888 ph10 427 if (eptr >= md->end_subject)
3889 ph10 426 {
3890 ph10 427 SCHECK_PARTIAL();
3891 ph10 510 MRRETURN(MATCH_NOMATCH);
3892 ph10 427 }
3893 ph10 178 switch(*eptr++)
3894     {
3895 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3896 ph10 178 case 0x09: /* HT */
3897     case 0x20: /* SPACE */
3898     case 0xa0: /* NBSP */
3899 ph10 182 break;
3900 ph10 178 }
3901     }
3902     break;
3903    
3904     case OP_NOT_VSPACE:
3905     for (i = 1; i <= min; i++)
3906     {
3907 ph10 427 if (eptr >= md->end_subject)
3908 ph10 426 {
3909 ph10 427 SCHECK_PARTIAL();
3910 ph10 510 MRRETURN(MATCH_NOMATCH);
3911 ph10 427 }
3912 ph10 178 switch(*eptr++)
3913     {
3914     default: break;
3915     case 0x0a: /* LF */
3916     case 0x0b: /* VT */
3917     case 0x0c: /* FF */
3918     case 0x0d: /* CR */
3919     case 0x85: /* NEL */
3920 ph10 510 MRRETURN(MATCH_NOMATCH);
3921 ph10 178 }
3922     }
3923     break;
3924    
3925     case OP_VSPACE:
3926     for (i = 1; i <= min; i++)
3927     {
3928 ph10 427 if (eptr >= md->end_subject)
3929 ph10 426 {
3930 ph10 427 SCHECK_PARTIAL();
3931 ph10 510 MRRETURN(MATCH_NOMATCH);
3932 ph10 427 }
3933 ph10