/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 510 - (hide annotations) (download)
Sat Mar 27 17:45:29 2010 UTC (3 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 174552 byte(s)
Add support for *MARK and names for *PRUNE, *SKIP, *THEN.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74     #define MATCH_COMMIT (-999)
75     #define MATCH_PRUNE (-998)
76     #define MATCH_SKIP (-997)
77 ph10 510 #define MATCH_SKIP_ARG (-996)
78     #define MATCH_THEN (-995)
79 ph10 210
80 ph10 510 /* This is a convenience macro for code that occurs many times. */
81    
82     #define MRRETURN(ra) \
83     { \
84     md->mark = markptr; \
85     RRETURN(ra); \
86     }
87    
88 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
89     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
90     because the offset vector is always a multiple of 3 long. */
91    
92     #define REC_STACK_SAVE_MAX 30
93    
94     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
95    
96     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
97     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
98    
99    
100    
101 ph10 475 #ifdef PCRE_DEBUG
102 nigel 77 /*************************************************
103     * Debugging function to print chars *
104     *************************************************/
105    
106     /* Print a sequence of chars in printable format, stopping at the end of the
107     subject if the requested.
108    
109     Arguments:
110     p points to characters
111     length number to print
112     is_subject TRUE if printing from within md->start_subject
113     md pointer to matching data block, if is_subject is TRUE
114    
115     Returns: nothing
116     */
117    
118     static void
119     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
120     {
121 nigel 93 unsigned int c;
122 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
123     while (length-- > 0)
124     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
125     }
126     #endif
127    
128    
129    
130     /*************************************************
131     * Match a back-reference *
132     *************************************************/
133    
134     /* If a back reference hasn't been set, the length that is passed is greater
135     than the number of characters left in the string, so the match fails.
136    
137     Arguments:
138     offset index into the offset vector
139     eptr points into the subject
140     length length to be matched
141     md points to match data block
142     ims the ims flags
143    
144     Returns: TRUE if matched
145     */
146    
147     static BOOL
148 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
149 nigel 77 unsigned long int ims)
150     {
151 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
152 nigel 77
153 ph10 475 #ifdef PCRE_DEBUG
154 nigel 77 if (eptr >= md->end_subject)
155     printf("matching subject <null>");
156     else
157     {
158     printf("matching subject ");
159     pchars(eptr, length, TRUE, md);
160     }
161     printf(" against backref ");
162     pchars(p, length, FALSE, md);
163     printf("\n");
164     #endif
165    
166     /* Always fail if not enough characters left */
167    
168     if (length > md->end_subject - eptr) return FALSE;
169    
170 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171     properly if Unicode properties are supported. Otherwise, we can check only
172     ASCII characters. */
173 nigel 77
174     if ((ims & PCRE_CASELESS) != 0)
175     {
176 ph10 354 #ifdef SUPPORT_UTF8
177     #ifdef SUPPORT_UCP
178     if (md->utf8)
179     {
180 ph10 358 USPTR endptr = eptr + length;
181 ph10 354 while (eptr < endptr)
182     {
183 ph10 358 int c, d;
184 ph10 354 GETCHARINC(c, eptr);
185     GETCHARINC(d, p);
186     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
187 ph10 358 }
188     }
189 ph10 354 else
190     #endif
191     #endif
192    
193     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
194     is no UCP support. */
195 ph10 358
196 nigel 77 while (length-- > 0)
197 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
198 nigel 77 }
199 ph10 358
200 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
201     are in UTF-8 mode. */
202 ph10 358
203 nigel 77 else
204     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
205    
206     return TRUE;
207     }
208    
209    
210    
211     /***************************************************************************
212     ****************************************************************************
213     RECURSION IN THE match() FUNCTION
214    
215 nigel 87 The match() function is highly recursive, though not every recursive call
216     increases the recursive depth. Nevertheless, some regular expressions can cause
217     it to recurse to a great depth. I was writing for Unix, so I just let it call
218     itself recursively. This uses the stack for saving everything that has to be
219     saved for a recursive call. On Unix, the stack can be large, and this works
220     fine.
221 nigel 77
222 nigel 87 It turns out that on some non-Unix-like systems there are problems with
223     programs that use a lot of stack. (This despite the fact that every last chip
224     has oodles of memory these days, and techniques for extending the stack have
225     been known for decades.) So....
226 nigel 77
227     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
228     calls by keeping local variables that need to be preserved in blocks of memory
229 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
230 nigel 77 achieve this so that the actual code doesn't look very different to what it
231     always used to.
232 ph10 164
233 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
234 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
235     Switzer, the use of longjmp() has been abolished, at the cost of having to
236     provide a unique number for each call to RMATCH. There is no way of generating
237     a sequence of numbers at compile time in C. I have given them names, to make
238     them stand out more clearly.
239    
240     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
241     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
242 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
243     don't have indeterminate values; this has meant that the frame size can be
244 ph10 164 reduced because the result can be "passed back" by straight setting of the
245     variable instead of being passed in the frame.
246 nigel 77 ****************************************************************************
247     ***************************************************************************/
248    
249 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
250     below must be updated in sync. */
251 nigel 77
252 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
253     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
254     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
255     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
256 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
257 ph10 212 RM51, RM52, RM53, RM54 };
258 ph10 164
259 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
260 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
261 ph10 501 actually used in this definition. */
262 nigel 77
263     #ifndef NO_RECURSE
264     #define REGISTER register
265 ph10 164
266 ph10 475 #ifdef PCRE_DEBUG
267 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
268 nigel 87 { \
269     printf("match() called in line %d\n", __LINE__); \
270 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
271 nigel 87 printf("to line %d\n", __LINE__); \
272     }
273     #define RRETURN(ra) \
274     { \
275     printf("match() returned %d from line %d ", ra, __LINE__); \
276     return ra; \
277     }
278     #else
279 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
280 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
281 nigel 77 #define RRETURN(ra) return ra
282 nigel 87 #endif
283    
284 nigel 77 #else
285    
286    
287 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
288     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
289     argument of match(), which never changes. */
290 nigel 77
291     #define REGISTER
292    
293 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
294 nigel 77 {\
295     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
296 ph10 164 frame->Xwhere = rw; \
297     newframe->Xeptr = ra;\
298     newframe->Xecode = rb;\
299 ph10 168 newframe->Xmstart = mstart;\
300 ph10 501 newframe->Xmarkptr = markptr;\
301 ph10 164 newframe->Xoffset_top = rc;\
302     newframe->Xims = re;\
303     newframe->Xeptrb = rf;\
304     newframe->Xflags = rg;\
305     newframe->Xrdepth = frame->Xrdepth + 1;\
306     newframe->Xprevframe = frame;\
307     frame = newframe;\
308     DPRINTF(("restarting from line %d\n", __LINE__));\
309     goto HEAP_RECURSE;\
310     L_##rw:\
311     DPRINTF(("jumped back to line %d\n", __LINE__));\
312 nigel 77 }
313    
314     #define RRETURN(ra)\
315     {\
316     heapframe *newframe = frame;\
317     frame = newframe->Xprevframe;\
318     (pcre_stack_free)(newframe);\
319     if (frame != NULL)\
320     {\
321 ph10 164 rrc = ra;\
322     goto HEAP_RETURN;\
323 nigel 77 }\
324     return ra;\
325     }
326    
327    
328     /* Structure for remembering the local variables in a private frame */
329    
330     typedef struct heapframe {
331     struct heapframe *Xprevframe;
332    
333     /* Function arguments that may change */
334    
335 ph10 409 USPTR Xeptr;
336 nigel 77 const uschar *Xecode;
337 ph10 409 USPTR Xmstart;
338 ph10 501 USPTR Xmarkptr;
339 nigel 77 int Xoffset_top;
340     long int Xims;
341     eptrblock *Xeptrb;
342     int Xflags;
343 nigel 91 unsigned int Xrdepth;
344 nigel 77
345     /* Function local variables */
346    
347 ph10 409 USPTR Xcallpat;
348 ph10 406 #ifdef SUPPORT_UTF8
349 ph10 409 USPTR Xcharptr;
350 ph10 406 #endif
351 ph10 409 USPTR Xdata;
352     USPTR Xnext;
353     USPTR Xpp;
354     USPTR Xprev;
355     USPTR Xsaved_eptr;
356 nigel 77
357     recursion_info Xnew_recursive;
358    
359     BOOL Xcur_is_word;
360     BOOL Xcondition;
361     BOOL Xprev_is_word;
362    
363     unsigned long int Xoriginal_ims;
364    
365     #ifdef SUPPORT_UCP
366     int Xprop_type;
367 nigel 87 int Xprop_value;
368 nigel 77 int Xprop_fail_result;
369     int Xprop_category;
370     int Xprop_chartype;
371 nigel 87 int Xprop_script;
372 ph10 123 int Xoclength;
373     uschar Xocchars[8];
374 nigel 77 #endif
375    
376 ph10 403 int Xcodelink;
377 nigel 77 int Xctype;
378 nigel 93 unsigned int Xfc;
379 nigel 77 int Xfi;
380     int Xlength;
381     int Xmax;
382     int Xmin;
383     int Xnumber;
384     int Xoffset;
385     int Xop;
386     int Xsave_capture_last;
387     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
388     int Xstacksave[REC_STACK_SAVE_MAX];
389    
390     eptrblock Xnewptrb;
391    
392 ph10 164 /* Where to jump back to */
393 nigel 77
394 ph10 164 int Xwhere;
395 ph10 165
396 nigel 77 } heapframe;
397    
398     #endif
399    
400    
401     /***************************************************************************
402     ***************************************************************************/
403    
404    
405    
406     /*************************************************
407     * Match from current position *
408     *************************************************/
409    
410 nigel 93 /* This function is called recursively in many circumstances. Whenever it
411 nigel 77 returns a negative (error) response, the outer incarnation must also return the
412 ph10 426 same response. */
413 nigel 77
414 ph10 426 /* These macros pack up tests that are used for partial matching, and which
415     appears several times in the code. We set the "hit end" flag if the pointer is
416     at the end of the subject and also past the start of the subject (i.e.
417 ph10 427 something has been matched). For hard partial matching, we then return
418     immediately. The second one is used when we already know we are past the end of
419     the subject. */
420 ph10 426
421     #define CHECK_PARTIAL()\
422 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
423 ph10 427 {\
424     md->hitend = TRUE;\
425 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
426 ph10 427 }
427 ph10 426
428     #define SCHECK_PARTIAL()\
429 ph10 462 if (md->partial != 0 && eptr > mstart)\
430 ph10 427 {\
431     md->hitend = TRUE;\
432 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
433 ph10 427 }
434 ph10 426
435 ph10 427
436 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
437     the md structure (e.g. utf8, end_subject) into individual variables to improve
438 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
439     made performance worse.
440    
441     Arguments:
442 nigel 93 eptr pointer to current character in subject
443     ecode pointer to current position in compiled code
444 ph10 168 mstart pointer to the current match start position (can be modified
445 ph10 172 by encountering \K)
446 ph10 501 markptr pointer to the most recent MARK name, or NULL
447 nigel 77 offset_top current top pointer
448     md pointer to "static" info for the match
449     ims current /i, /m, and /s options
450     eptrb pointer to chain of blocks containing eptr at start of
451     brackets - for testing for empty matches
452     flags can contain
453     match_condassert - this is an assertion condition
454 nigel 93 match_cbegroup - this is the start of an unlimited repeat
455     group that can match an empty string
456 nigel 87 rdepth the recursion depth
457 nigel 77
458     Returns: MATCH_MATCH if matched ) these values are >= 0
459     MATCH_NOMATCH if failed to match )
460 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
461 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
462 nigel 87 (e.g. stopped by repeated call or recursion limit)
463 nigel 77 */
464    
465     static int
466 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
467     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
468 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
469 nigel 77 {
470     /* These variables do not need to be preserved over recursion in this function,
471 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
472     "register" because they are used a lot in loops. */
473 nigel 77
474 nigel 91 register int rrc; /* Returns from recursive calls */
475     register int i; /* Used for loops not involving calls to RMATCH() */
476 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
477 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
478 nigel 77
479 nigel 93 BOOL minimize, possessive; /* Quantifier options */
480 ph10 403 int condcode;
481 nigel 93
482 nigel 77 /* When recursion is not being used, all "local" variables that have to be
483     preserved over calls to RMATCH() are part of a "frame" which is obtained from
484     heap storage. Set up the top-level frame here; others are obtained from the
485     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
486    
487     #ifdef NO_RECURSE
488     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
489     frame->Xprevframe = NULL; /* Marks the top level */
490    
491     /* Copy in the original argument variables */
492    
493     frame->Xeptr = eptr;
494     frame->Xecode = ecode;
495 ph10 168 frame->Xmstart = mstart;
496 ph10 501 frame->Xmarkptr = markptr;
497 nigel 77 frame->Xoffset_top = offset_top;
498     frame->Xims = ims;
499     frame->Xeptrb = eptrb;
500     frame->Xflags = flags;
501 nigel 87 frame->Xrdepth = rdepth;
502 nigel 77
503     /* This is where control jumps back to to effect "recursion" */
504    
505     HEAP_RECURSE:
506    
507     /* Macros make the argument variables come from the current frame */
508    
509     #define eptr frame->Xeptr
510     #define ecode frame->Xecode
511 ph10 168 #define mstart frame->Xmstart
512 ph10 501 #define markptr frame->Xmarkptr
513 nigel 77 #define offset_top frame->Xoffset_top
514     #define ims frame->Xims
515     #define eptrb frame->Xeptrb
516     #define flags frame->Xflags
517 nigel 87 #define rdepth frame->Xrdepth
518 nigel 77
519     /* Ditto for the local variables */
520    
521     #ifdef SUPPORT_UTF8
522     #define charptr frame->Xcharptr
523     #endif
524     #define callpat frame->Xcallpat
525 ph10 403 #define codelink frame->Xcodelink
526 nigel 77 #define data frame->Xdata
527     #define next frame->Xnext
528     #define pp frame->Xpp
529     #define prev frame->Xprev
530     #define saved_eptr frame->Xsaved_eptr
531    
532     #define new_recursive frame->Xnew_recursive
533    
534     #define cur_is_word frame->Xcur_is_word
535     #define condition frame->Xcondition
536     #define prev_is_word frame->Xprev_is_word
537    
538     #define original_ims frame->Xoriginal_ims
539    
540     #ifdef SUPPORT_UCP
541     #define prop_type frame->Xprop_type
542 nigel 87 #define prop_value frame->Xprop_value
543 nigel 77 #define prop_fail_result frame->Xprop_fail_result
544     #define prop_category frame->Xprop_category
545     #define prop_chartype frame->Xprop_chartype
546 nigel 87 #define prop_script frame->Xprop_script
547 ph10 115 #define oclength frame->Xoclength
548     #define occhars frame->Xocchars
549 nigel 77 #endif
550    
551     #define ctype frame->Xctype
552     #define fc frame->Xfc
553     #define fi frame->Xfi
554     #define length frame->Xlength
555     #define max frame->Xmax
556     #define min frame->Xmin
557     #define number frame->Xnumber
558     #define offset frame->Xoffset
559     #define op frame->Xop
560     #define save_capture_last frame->Xsave_capture_last
561     #define save_offset1 frame->Xsave_offset1
562     #define save_offset2 frame->Xsave_offset2
563     #define save_offset3 frame->Xsave_offset3
564     #define stacksave frame->Xstacksave
565    
566     #define newptrb frame->Xnewptrb
567    
568     /* When recursion is being used, local variables are allocated on the stack and
569     get preserved during recursion in the normal way. In this environment, fi and
570     i, and fc and c, can be the same variables. */
571    
572 nigel 93 #else /* NO_RECURSE not defined */
573 nigel 77 #define fi i
574     #define fc c
575    
576    
577 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
578     const uschar *charptr; /* in small blocks of the code. My normal */
579     #endif /* style of coding would have declared */
580     const uschar *callpat; /* them within each of those blocks. */
581     const uschar *data; /* However, in order to accommodate the */
582     const uschar *next; /* version of this code that uses an */
583     USPTR pp; /* external "stack" implemented on the */
584     const uschar *prev; /* heap, it is easier to declare them all */
585     USPTR saved_eptr; /* here, so the declarations can be cut */
586     /* out in a block. The only declarations */
587     recursion_info new_recursive; /* within blocks below are for variables */
588     /* that do not have to be preserved over */
589     BOOL cur_is_word; /* a recursive call to RMATCH(). */
590     BOOL condition;
591 nigel 77 BOOL prev_is_word;
592    
593     unsigned long int original_ims;
594    
595     #ifdef SUPPORT_UCP
596     int prop_type;
597 nigel 87 int prop_value;
598 nigel 77 int prop_fail_result;
599     int prop_category;
600     int prop_chartype;
601 nigel 87 int prop_script;
602 ph10 115 int oclength;
603     uschar occhars[8];
604 nigel 77 #endif
605    
606 ph10 399 int codelink;
607 nigel 77 int ctype;
608     int length;
609     int max;
610     int min;
611     int number;
612     int offset;
613     int op;
614     int save_capture_last;
615     int save_offset1, save_offset2, save_offset3;
616     int stacksave[REC_STACK_SAVE_MAX];
617    
618     eptrblock newptrb;
619 nigel 93 #endif /* NO_RECURSE */
620 nigel 77
621     /* These statements are here to stop the compiler complaining about unitialized
622     variables. */
623    
624     #ifdef SUPPORT_UCP
625 nigel 87 prop_value = 0;
626 nigel 77 prop_fail_result = 0;
627     #endif
628    
629 nigel 93
630 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
631     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
632     used. Thanks to Ian Taylor for noticing this possibility and sending the
633     original patch. */
634    
635     TAIL_RECURSE:
636    
637 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
638     are specified by the macro RMATCH and RRETURN is used to return. When
639     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
640 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
641 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
642     complicated macro. It has to be used in one particular way. This shouldn't,
643     however, impact performance when true recursion is being used. */
644 nigel 77
645 ph10 164 #ifdef SUPPORT_UTF8
646     utf8 = md->utf8; /* Local copy of the flag */
647     #else
648     utf8 = FALSE;
649     #endif
650    
651 nigel 87 /* First check that we haven't called match() too many times, or that we
652     haven't exceeded the recursive call limit. */
653    
654 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
655 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
656 nigel 77
657     original_ims = ims; /* Save for resetting on ')' */
658 nigel 91
659 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
660     string, the match_cbegroup flag is set. When this is the case, add the current
661     subject pointer to the chain of such remembered pointers, to be checked when we
662     hit the closing ket, in order to break infinite loops that match no characters.
663 ph10 197 When match() is called in other circumstances, don't add to the chain. The
664     match_cbegroup flag must NOT be used with tail recursion, because the memory
665     block that is used is on the stack, so a new one may be required for each
666     match(). */
667 nigel 77
668 nigel 93 if ((flags & match_cbegroup) != 0)
669 nigel 77 {
670 ph10 197 newptrb.epb_saved_eptr = eptr;
671     newptrb.epb_prev = eptrb;
672     eptrb = &newptrb;
673 nigel 77 }
674    
675 nigel 93 /* Now start processing the opcodes. */
676 nigel 77
677     for (;;)
678     {
679 nigel 93 minimize = possessive = FALSE;
680 nigel 77 op = *ecode;
681 ph10 443
682 nigel 93 switch(op)
683     {
684 ph10 510 case OP_MARK:
685     markptr = ecode + 2;
686     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
687     ims, eptrb, flags, RM51);
688    
689     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
690     argument, and we must check whether that argument matches this MARK's
691     argument. It is passed back in md->start_match_ptr (an overloading of that
692     variable). If it does match, we reset that variable to the current subject
693     position and return MATCH_SKIP. Otherwise, pass back the return code
694     unaltered. */
695    
696     if (rrc == MATCH_SKIP_ARG &&
697     strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
698     {
699     md->start_match_ptr = eptr;
700     RRETURN(MATCH_SKIP);
701     }
702    
703     if (md->mark == NULL) md->mark = markptr;
704     RRETURN(rrc);
705    
706 ph10 210 case OP_FAIL:
707 ph10 510 MRRETURN(MATCH_NOMATCH);
708 ph10 211
709 ph10 510 case OP_COMMIT:
710     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
711     ims, eptrb, flags, RM52);
712     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
713     MRRETURN(MATCH_COMMIT);
714    
715 ph10 210 case OP_PRUNE:
716     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717     ims, eptrb, flags, RM51);
718     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
719 ph10 510 MRRETURN(MATCH_PRUNE);
720 ph10 211
721 ph10 510 case OP_PRUNE_ARG:
722     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
723     ims, eptrb, flags, RM51);
724 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
725 ph10 510 md->mark = ecode + 2;
726     RRETURN(MATCH_PRUNE);
727 ph10 211
728 ph10 210 case OP_SKIP:
729     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
730     ims, eptrb, flags, RM53);
731     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
732 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
733 ph10 510 MRRETURN(MATCH_SKIP);
734 ph10 211
735 ph10 510 case OP_SKIP_ARG:
736     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
737     ims, eptrb, flags, RM53);
738     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
739    
740     /* Pass back the current skip name by overloading md->start_match_ptr and
741     returning the special MATCH_SKIP_ARG return code. This will either be
742     caught by a matching MARK, or get to the top, where it is treated the same
743     as PRUNE. */
744    
745     md->start_match_ptr = ecode + 2;
746     RRETURN(MATCH_SKIP_ARG);
747    
748 ph10 210 case OP_THEN:
749     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
750 ph10 212 ims, eptrb, flags, RM54);
751 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
752 ph10 510 MRRETURN(MATCH_THEN);
753    
754     case OP_THEN_ARG:
755     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
756     ims, eptrb, flags, RM54);
757     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
758     md->mark = ecode + 2;
759 ph10 212 RRETURN(MATCH_THEN);
760 ph10 211
761 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
762     the current subject position in the working slot at the top of the vector.
763     We mustn't change the current values of the data slot, because they may be
764     set from a previous iteration of this group, and be referred to by a
765     reference inside the group.
766 nigel 77
767 nigel 93 If the bracket fails to match, we need to restore this value and also the
768     values of the final offsets, in case they were set by a previous iteration
769     of the same bracket.
770 nigel 77
771 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
772     a non-capturing bracket. Don't worry about setting the flag for the error
773     case here; that is handled in the code for KET. */
774 nigel 77
775 nigel 93 case OP_CBRA:
776     case OP_SCBRA:
777     number = GET2(ecode, 1+LINK_SIZE);
778 nigel 77 offset = number << 1;
779    
780 ph10 475 #ifdef PCRE_DEBUG
781 nigel 93 printf("start bracket %d\n", number);
782     printf("subject=");
783 nigel 77 pchars(eptr, 16, TRUE, md);
784     printf("\n");
785     #endif
786    
787     if (offset < md->offset_max)
788     {
789     save_offset1 = md->offset_vector[offset];
790     save_offset2 = md->offset_vector[offset+1];
791     save_offset3 = md->offset_vector[md->offset_end - number];
792     save_capture_last = md->capture_last;
793    
794     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
795     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
796    
797 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
798 nigel 77 do
799     {
800 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
801     ims, eptrb, flags, RM1);
802 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803 nigel 77 md->capture_last = save_capture_last;
804     ecode += GET(ecode, 1);
805     }
806     while (*ecode == OP_ALT);
807    
808     DPRINTF(("bracket %d failed\n", number));
809    
810     md->offset_vector[offset] = save_offset1;
811     md->offset_vector[offset+1] = save_offset2;
812     md->offset_vector[md->offset_end - number] = save_offset3;
813    
814 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
815 nigel 77 RRETURN(MATCH_NOMATCH);
816     }
817    
818 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
819     as a non-capturing bracket. */
820 nigel 77
821 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
822     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823    
824 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
825 nigel 77
826 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
830     final alternative within the brackets, we would return the result of a
831     recursive call to match() whatever happened. We can reduce stack usage by
832 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
833     is set.*/
834 nigel 77
835 nigel 93 case OP_BRA:
836     case OP_SBRA:
837     DPRINTF(("start non-capturing bracket\n"));
838     flags = (op >= OP_SBRA)? match_cbegroup : 0;
839 nigel 91 for (;;)
840 nigel 77 {
841 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
842 nigel 93 {
843 ph10 197 if (flags == 0) /* Not a possibly empty group */
844     {
845     ecode += _pcre_OP_lengths[*ecode];
846     DPRINTF(("bracket 0 tail recursion\n"));
847     goto TAIL_RECURSE;
848     }
849    
850     /* Possibly empty group; can't use tail recursion. */
851    
852     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
853     eptrb, flags, RM48);
854 ph10 510 if (rrc == MATCH_NOMATCH) md->mark = markptr;
855     RRETURN(rrc);
856 nigel 93 }
857 nigel 91
858     /* For non-final alternatives, continue the loop for a NOMATCH result;
859     otherwise return. */
860    
861 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
862     eptrb, flags, RM2);
863 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
864 nigel 77 ecode += GET(ecode, 1);
865     }
866 nigel 91 /* Control never reaches here. */
867 nigel 77
868     /* Conditional group: compilation checked that there are no more than
869     two branches. If the condition is false, skipping the first branch takes us
870     past the end if there is only one branch, but that's OK because that is
871 nigel 91 exactly what going to the ket would do. As there is only one branch to be
872     obeyed, we can use tail recursion to avoid using another stack frame. */
873 nigel 77
874     case OP_COND:
875 nigel 93 case OP_SCOND:
876 ph10 399 codelink= GET(ecode, 1);
877 ph10 406
878 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
879     inserted between OP_COND and an assertion condition. */
880 ph10 392
881 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
882     {
883     if (pcre_callout != NULL)
884     {
885     pcre_callout_block cb;
886     cb.version = 1; /* Version 1 of the callout block */
887     cb.callout_number = ecode[LINK_SIZE+2];
888     cb.offset_vector = md->offset_vector;
889     cb.subject = (PCRE_SPTR)md->start_subject;
890     cb.subject_length = md->end_subject - md->start_subject;
891     cb.start_match = mstart - md->start_subject;
892     cb.current_position = eptr - md->start_subject;
893     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
894     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
895     cb.capture_top = offset_top/2;
896     cb.capture_last = md->capture_last;
897     cb.callout_data = md->callout_data;
898 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
899 ph10 381 if (rrc < 0) RRETURN(rrc);
900     }
901     ecode += _pcre_OP_lengths[OP_CALLOUT];
902     }
903 ph10 392
904 ph10 399 condcode = ecode[LINK_SIZE+1];
905 ph10 406
906 ph10 381 /* Now see what the actual condition is */
907 ph10 392
908 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
909 nigel 77 {
910 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
911     {
912 ph10 461 condition = FALSE;
913     ecode += GET(ecode, 1);
914     }
915 ph10 459 else
916 ph10 461 {
917 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
918     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
919 ph10 461
920 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
921     false, but the test was set up by name, scan the table to see if the
922     name refers to any other numbers, and test them. The condition is true
923     if any one is set. */
924 ph10 461
925 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
926     {
927     uschar *slotA = md->name_table;
928     for (i = 0; i < md->name_count; i++)
929 ph10 461 {
930     if (GET2(slotA, 0) == recno) break;
931 ph10 459 slotA += md->name_entry_size;
932     }
933 ph10 461
934 ph10 459 /* Found a name for the number - there can be only one; duplicate
935     names for different numbers are allowed, but not vice versa. First
936     scan down for duplicates. */
937 ph10 461
938 ph10 459 if (i < md->name_count)
939 ph10 461 {
940 ph10 459 uschar *slotB = slotA;
941     while (slotB > md->name_table)
942     {
943     slotB -= md->name_entry_size;
944     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
945     {
946     condition = GET2(slotB, 0) == md->recursive->group_num;
947 ph10 461 if (condition) break;
948     }
949 ph10 459 else break;
950 ph10 461 }
951    
952 ph10 459 /* Scan up for duplicates */
953 ph10 461
954 ph10 459 if (!condition)
955 ph10 461 {
956 ph10 459 slotB = slotA;
957     for (i++; i < md->name_count; i++)
958     {
959     slotB += md->name_entry_size;
960     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961     {
962     condition = GET2(slotB, 0) == md->recursive->group_num;
963     if (condition) break;
964 ph10 461 }
965 ph10 459 else break;
966 ph10 461 }
967     }
968 ph10 459 }
969 ph10 461 }
970    
971 ph10 459 /* Chose branch according to the condition */
972 ph10 461
973 ph10 459 ecode += condition? 3 : GET(ecode, 1);
974     }
975 ph10 461 }
976 nigel 93
977 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
978 nigel 93 {
979 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
980 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
981 ph10 461
982 ph10 459 /* If the numbered capture is unset, but the reference was by name,
983 ph10 461 scan the table to see if the name refers to any other numbers, and test
984     them. The condition is true if any one is set. This is tediously similar
985     to the code above, but not close enough to try to amalgamate. */
986    
987 ph10 459 if (!condition && condcode == OP_NCREF)
988     {
989 ph10 461 int refno = offset >> 1;
990 ph10 459 uschar *slotA = md->name_table;
991 ph10 461
992 ph10 459 for (i = 0; i < md->name_count; i++)
993 ph10 461 {
994     if (GET2(slotA, 0) == refno) break;
995 ph10 459 slotA += md->name_entry_size;
996     }
997 ph10 461
998     /* Found a name for the number - there can be only one; duplicate names
999     for different numbers are allowed, but not vice versa. First scan down
1000 ph10 459 for duplicates. */
1001 ph10 461
1002 ph10 459 if (i < md->name_count)
1003 ph10 461 {
1004 ph10 459 uschar *slotB = slotA;
1005     while (slotB > md->name_table)
1006     {
1007     slotB -= md->name_entry_size;
1008     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1009     {
1010     offset = GET2(slotB, 0) << 1;
1011 ph10 461 condition = offset < offset_top &&
1012 ph10 459 md->offset_vector[offset] >= 0;
1013 ph10 461 if (condition) break;
1014     }
1015 ph10 459 else break;
1016 ph10 461 }
1017    
1018 ph10 459 /* Scan up for duplicates */
1019 ph10 461
1020 ph10 459 if (!condition)
1021 ph10 461 {
1022 ph10 459 slotB = slotA;
1023     for (i++; i < md->name_count; i++)
1024     {
1025     slotB += md->name_entry_size;
1026     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1027     {
1028     offset = GET2(slotB, 0) << 1;
1029 ph10 461 condition = offset < offset_top &&
1030 ph10 459 md->offset_vector[offset] >= 0;
1031 ph10 461 if (condition) break;
1032     }
1033 ph10 459 else break;
1034 ph10 461 }
1035     }
1036 ph10 459 }
1037 ph10 461 }
1038    
1039 ph10 459 /* Chose branch according to the condition */
1040    
1041 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1042 nigel 77 }
1043    
1044 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1045 nigel 93 {
1046     condition = FALSE;
1047     ecode += GET(ecode, 1);
1048     }
1049    
1050 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1051 nigel 93 the final argument match_condassert causes it to stop at the end of an
1052     assertion. */
1053 nigel 77
1054     else
1055     {
1056 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1057     match_condassert, RM3);
1058 nigel 77 if (rrc == MATCH_MATCH)
1059     {
1060 nigel 93 condition = TRUE;
1061     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1062 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1063     }
1064 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1065 nigel 77 {
1066     RRETURN(rrc); /* Need braces because of following else */
1067     }
1068 nigel 93 else
1069     {
1070     condition = FALSE;
1071 ph10 399 ecode += codelink;
1072 nigel 93 }
1073     }
1074 nigel 91
1075 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1076 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1077     match_cbegroup is required for an unlimited repeat of a possibly empty
1078     group. If the second alternative doesn't exist, we can just plough on. */
1079 nigel 91
1080 nigel 93 if (condition || *ecode == OP_ALT)
1081     {
1082 nigel 91 ecode += 1 + LINK_SIZE;
1083 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1084     {
1085     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1086     RRETURN(rrc);
1087     }
1088     else /* Group must match something */
1089     {
1090     flags = 0;
1091     goto TAIL_RECURSE;
1092     }
1093 nigel 77 }
1094 ph10 395 else /* Condition false & no alternative */
1095 nigel 93 {
1096     ecode += 1 + LINK_SIZE;
1097     }
1098     break;
1099 nigel 77
1100 ph10 461
1101 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1102     to close any currently open capturing brackets. */
1103 ph10 461
1104 ph10 447 case OP_CLOSE:
1105 ph10 461 number = GET2(ecode, 1);
1106 ph10 447 offset = number << 1;
1107 ph10 461
1108 ph10 475 #ifdef PCRE_DEBUG
1109 ph10 447 printf("end bracket %d at *ACCEPT", number);
1110     printf("\n");
1111     #endif
1112 nigel 77
1113 ph10 447 md->capture_last = number;
1114     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1115     {
1116     md->offset_vector[offset] =
1117     md->offset_vector[md->offset_end - number];
1118     md->offset_vector[offset+1] = eptr - md->start_subject;
1119     if (offset_top <= offset) offset_top = offset + 2;
1120     }
1121     ecode += 3;
1122 ph10 461 break;
1123 ph10 447
1124    
1125 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1126     recursion, we should restore the offsets appropriately and continue from
1127     after the call. */
1128 nigel 77
1129 ph10 210 case OP_ACCEPT:
1130 nigel 77 case OP_END:
1131     if (md->recursive != NULL && md->recursive->group_num == 0)
1132     {
1133     recursion_info *rec = md->recursive;
1134 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1135 nigel 77 md->recursive = rec->prevrec;
1136     memmove(md->offset_vector, rec->offset_save,
1137     rec->saved_max * sizeof(int));
1138 ph10 461 offset_top = rec->save_offset_top;
1139 nigel 77 ims = original_ims;
1140     ecode = rec->after_call;
1141     break;
1142     }
1143    
1144 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1145     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1146     the subject. In both cases, backtracking will then try other alternatives,
1147     if any. */
1148 ph10 443
1149 ph10 442 if (eptr == mstart &&
1150     (md->notempty ||
1151 ph10 443 (md->notempty_atstart &&
1152 ph10 442 mstart == md->start_subject + md->start_offset)))
1153 ph10 510 MRRETURN(MATCH_NOMATCH);
1154 ph10 443
1155 ph10 442 /* Otherwise, we have a match. */
1156 nigel 77
1157 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1158     md->end_offset_top = offset_top; /* and how many extracts were taken */
1159 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1160 ph10 510 MRRETURN(MATCH_MATCH);
1161 nigel 77
1162     /* Change option settings */
1163    
1164     case OP_OPT:
1165     ims = ecode[1];
1166     ecode += 2;
1167     DPRINTF(("ims set to %02lx\n", ims));
1168     break;
1169    
1170     /* Assertion brackets. Check the alternative branches in turn - the
1171     matching won't pass the KET for an assertion. If any one branch matches,
1172     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1173     start of each branch to move the current point backwards, so the code at
1174     this level is identical to the lookahead case. */
1175    
1176     case OP_ASSERT:
1177     case OP_ASSERTBACK:
1178     do
1179     {
1180 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1181     RM4);
1182 ph10 501 if (rrc == MATCH_MATCH)
1183 ph10 500 {
1184     mstart = md->start_match_ptr; /* In case \K reset it */
1185     break;
1186 ph10 501 }
1187 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1188 nigel 77 ecode += GET(ecode, 1);
1189     }
1190     while (*ecode == OP_ALT);
1191 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1192 nigel 77
1193     /* If checking an assertion for a condition, return MATCH_MATCH. */
1194    
1195     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1196    
1197     /* Continue from after the assertion, updating the offsets high water
1198     mark, since extracts may have been taken during the assertion. */
1199    
1200     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1201     ecode += 1 + LINK_SIZE;
1202     offset_top = md->end_offset_top;
1203     continue;
1204    
1205 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1206 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1207 ph10 473 branches. */
1208 nigel 77
1209     case OP_ASSERT_NOT:
1210     case OP_ASSERTBACK_NOT:
1211     do
1212     {
1213 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214     RM5);
1215 ph10 510 if (rrc == MATCH_MATCH) MRRETURN(MATCH_NOMATCH);
1216 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1217     {
1218     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1219 ph10 482 break;
1220     }
1221 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1222 nigel 77 ecode += GET(ecode,1);
1223     }
1224     while (*ecode == OP_ALT);
1225    
1226     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1227    
1228     ecode += 1 + LINK_SIZE;
1229     continue;
1230    
1231     /* Move the subject pointer back. This occurs only at the start of
1232     each branch of a lookbehind assertion. If we are too close to the start to
1233     move back, this match function fails. When working with UTF-8 we move
1234     back a number of characters, not bytes. */
1235    
1236     case OP_REVERSE:
1237     #ifdef SUPPORT_UTF8
1238     if (utf8)
1239     {
1240 nigel 93 i = GET(ecode, 1);
1241     while (i-- > 0)
1242 nigel 77 {
1243     eptr--;
1244 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1245 ph10 207 BACKCHAR(eptr);
1246 nigel 77 }
1247     }
1248     else
1249     #endif
1250    
1251     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1252    
1253     {
1254 nigel 93 eptr -= GET(ecode, 1);
1255 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1256 nigel 77 }
1257    
1258 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1259 nigel 77
1260 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1261 nigel 77 ecode += 1 + LINK_SIZE;
1262     break;
1263    
1264     /* The callout item calls an external function, if one is provided, passing
1265     details of the match so far. This is mainly for debugging, though the
1266     function is able to force a failure. */
1267    
1268     case OP_CALLOUT:
1269     if (pcre_callout != NULL)
1270     {
1271     pcre_callout_block cb;
1272     cb.version = 1; /* Version 1 of the callout block */
1273     cb.callout_number = ecode[1];
1274     cb.offset_vector = md->offset_vector;
1275 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1276 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1277 ph10 168 cb.start_match = mstart - md->start_subject;
1278 nigel 77 cb.current_position = eptr - md->start_subject;
1279     cb.pattern_position = GET(ecode, 2);
1280     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1281     cb.capture_top = offset_top/2;
1282     cb.capture_last = md->capture_last;
1283     cb.callout_data = md->callout_data;
1284 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1285 nigel 77 if (rrc < 0) RRETURN(rrc);
1286     }
1287     ecode += 2 + 2*LINK_SIZE;
1288     break;
1289    
1290     /* Recursion either matches the current regex, or some subexpression. The
1291     offset data is the offset to the starting bracket from the start of the
1292     whole pattern. (This is so that it works from duplicated subpatterns.)
1293    
1294     If there are any capturing brackets started but not finished, we have to
1295     save their starting points and reinstate them after the recursion. However,
1296     we don't know how many such there are (offset_top records the completed
1297     total) so we just have to save all the potential data. There may be up to
1298     65535 such values, which is too large to put on the stack, but using malloc
1299     for small numbers seems expensive. As a compromise, the stack is used when
1300     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1301     is used. A problem is what to do if the malloc fails ... there is no way of
1302     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1303     values on the stack, and accept that the rest may be wrong.
1304    
1305     There are also other values that have to be saved. We use a chained
1306     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1307     for the original version of this logic. */
1308    
1309     case OP_RECURSE:
1310     {
1311     callpat = md->start_code + GET(ecode, 1);
1312 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1313     GET2(callpat, 1 + LINK_SIZE);
1314 nigel 77
1315     /* Add to "recursing stack" */
1316    
1317     new_recursive.prevrec = md->recursive;
1318     md->recursive = &new_recursive;
1319    
1320     /* Find where to continue from afterwards */
1321    
1322     ecode += 1 + LINK_SIZE;
1323     new_recursive.after_call = ecode;
1324    
1325     /* Now save the offset data. */
1326    
1327     new_recursive.saved_max = md->offset_end;
1328     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1329     new_recursive.offset_save = stacksave;
1330     else
1331     {
1332     new_recursive.offset_save =
1333     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1334     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1335     }
1336    
1337     memcpy(new_recursive.offset_save, md->offset_vector,
1338     new_recursive.saved_max * sizeof(int));
1339 ph10 461 new_recursive.save_offset_top = offset_top;
1340 nigel 77
1341     /* OK, now we can do the recursion. For each top-level alternative we
1342     restore the offset and recursion data. */
1343    
1344     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1345 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1346 nigel 77 do
1347     {
1348 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1349     md, ims, eptrb, flags, RM6);
1350 nigel 77 if (rrc == MATCH_MATCH)
1351     {
1352 nigel 87 DPRINTF(("Recursion matched\n"));
1353 nigel 77 md->recursive = new_recursive.prevrec;
1354     if (new_recursive.offset_save != stacksave)
1355     (pcre_free)(new_recursive.offset_save);
1356 ph10 510 MRRETURN(MATCH_MATCH);
1357 nigel 77 }
1358 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1359 nigel 87 {
1360     DPRINTF(("Recursion gave error %d\n", rrc));
1361 ph10 400 if (new_recursive.offset_save != stacksave)
1362     (pcre_free)(new_recursive.offset_save);
1363 nigel 87 RRETURN(rrc);
1364     }
1365 nigel 77
1366     md->recursive = &new_recursive;
1367     memcpy(md->offset_vector, new_recursive.offset_save,
1368     new_recursive.saved_max * sizeof(int));
1369     callpat += GET(callpat, 1);
1370     }
1371     while (*callpat == OP_ALT);
1372    
1373     DPRINTF(("Recursion didn't match\n"));
1374     md->recursive = new_recursive.prevrec;
1375     if (new_recursive.offset_save != stacksave)
1376     (pcre_free)(new_recursive.offset_save);
1377 ph10 510 MRRETURN(MATCH_NOMATCH);
1378 nigel 77 }
1379     /* Control never reaches here */
1380    
1381     /* "Once" brackets are like assertion brackets except that after a match,
1382     the point in the subject string is not moved back. Thus there can never be
1383     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1384     Check the alternative branches in turn - the matching won't pass the KET
1385     for this kind of subpattern. If any one branch matches, we carry on as at
1386 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1387     the start-of-match value in case it was changed by \K. */
1388 nigel 77
1389     case OP_ONCE:
1390 nigel 91 prev = ecode;
1391     saved_eptr = eptr;
1392    
1393     do
1394 nigel 77 {
1395 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1396 ph10 501 if (rrc == MATCH_MATCH)
1397 ph10 500 {
1398     mstart = md->start_match_ptr;
1399     break;
1400 ph10 501 }
1401 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1402 nigel 91 ecode += GET(ecode,1);
1403     }
1404     while (*ecode == OP_ALT);
1405 nigel 77
1406 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1407 nigel 77
1408 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1409 nigel 77
1410 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1411     mark, since extracts may have been taken. */
1412 nigel 77
1413 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1414 nigel 77
1415 nigel 91 offset_top = md->end_offset_top;
1416     eptr = md->end_match_ptr;
1417 nigel 77
1418 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1419     happens for a repeating ket if no characters were matched in the group.
1420     This is the forcible breaking of infinite loops as implemented in Perl
1421     5.005. If there is an options reset, it will get obeyed in the normal
1422     course of events. */
1423 nigel 77
1424 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1425     {
1426     ecode += 1+LINK_SIZE;
1427     break;
1428     }
1429 nigel 77
1430 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1431     preceding bracket, in the appropriate order. The second "call" of match()
1432     uses tail recursion, to avoid using another stack frame. We need to reset
1433     any options that changed within the bracket before re-running it, so
1434     check the next opcode. */
1435 nigel 77
1436 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1437     {
1438     ims = (ims & ~PCRE_IMS) | ecode[4];
1439     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1440     }
1441 nigel 77
1442 nigel 91 if (*ecode == OP_KETRMIN)
1443     {
1444 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1445 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446     ecode = prev;
1447 ph10 197 flags = 0;
1448 nigel 91 goto TAIL_RECURSE;
1449 nigel 77 }
1450 nigel 91 else /* OP_KETRMAX */
1451     {
1452 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1453 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454     ecode += 1 + LINK_SIZE;
1455 ph10 197 flags = 0;
1456 nigel 91 goto TAIL_RECURSE;
1457     }
1458     /* Control never gets here */
1459 nigel 77
1460     /* An alternation is the end of a branch; scan along to find the end of the
1461     bracketed group and go to there. */
1462    
1463     case OP_ALT:
1464     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1465     break;
1466    
1467 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1468     indicating that it may occur zero times. It may repeat infinitely, or not
1469     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1470     with fixed upper repeat limits are compiled as a number of copies, with the
1471     optional ones preceded by BRAZERO or BRAMINZERO. */
1472 nigel 77
1473     case OP_BRAZERO:
1474     {
1475     next = ecode+1;
1476 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1477 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1478     do next += GET(next,1); while (*next == OP_ALT);
1479 nigel 93 ecode = next + 1 + LINK_SIZE;
1480 nigel 77 }
1481     break;
1482    
1483     case OP_BRAMINZERO:
1484     {
1485     next = ecode+1;
1486 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1487 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1488 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1489     ecode++;
1490     }
1491     break;
1492    
1493 ph10 335 case OP_SKIPZERO:
1494     {
1495     next = ecode+1;
1496     do next += GET(next,1); while (*next == OP_ALT);
1497     ecode = next + 1 + LINK_SIZE;
1498     }
1499     break;
1500    
1501 nigel 93 /* End of a group, repeated or non-repeating. */
1502 nigel 77
1503     case OP_KET:
1504     case OP_KETRMIN:
1505     case OP_KETRMAX:
1506 nigel 91 prev = ecode - GET(ecode, 1);
1507 nigel 77
1508 nigel 93 /* If this was a group that remembered the subject start, in order to break
1509     infinite repeats of empty string matches, retrieve the subject start from
1510     the chain. Otherwise, set it NULL. */
1511 nigel 77
1512 nigel 93 if (*prev >= OP_SBRA)
1513     {
1514     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1515     eptrb = eptrb->epb_prev; /* Backup to previous group */
1516     }
1517     else saved_eptr = NULL;
1518 nigel 77
1519 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1520     matching and return MATCH_MATCH, but record the current high water mark for
1521     use by positive assertions. We also need to record the match start in case
1522     it was changed by \K. */
1523 nigel 93
1524 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1525     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1526     *prev == OP_ONCE)
1527     {
1528     md->end_match_ptr = eptr; /* For ONCE */
1529     md->end_offset_top = offset_top;
1530 ph10 500 md->start_match_ptr = mstart;
1531 ph10 510 MRRETURN(MATCH_MATCH);
1532 nigel 91 }
1533 nigel 77
1534 nigel 93 /* For capturing groups we have to check the group number back at the start
1535     and if necessary complete handling an extraction by setting the offsets and
1536     bumping the high water mark. Note that whole-pattern recursion is coded as
1537     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1538     when the OP_END is reached. Other recursion is handled here. */
1539 nigel 77
1540 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1541 nigel 91 {
1542 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1543 nigel 91 offset = number << 1;
1544 ph10 461
1545 ph10 475 #ifdef PCRE_DEBUG
1546 nigel 91 printf("end bracket %d", number);
1547     printf("\n");
1548 nigel 77 #endif
1549    
1550 nigel 93 md->capture_last = number;
1551     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1552 nigel 91 {
1553 nigel 93 md->offset_vector[offset] =
1554     md->offset_vector[md->offset_end - number];
1555     md->offset_vector[offset+1] = eptr - md->start_subject;
1556     if (offset_top <= offset) offset_top = offset + 2;
1557     }
1558 nigel 77
1559 nigel 93 /* Handle a recursively called group. Restore the offsets
1560     appropriately and continue from after the call. */
1561 nigel 77
1562 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1563     {
1564     recursion_info *rec = md->recursive;
1565     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1566     md->recursive = rec->prevrec;
1567     memcpy(md->offset_vector, rec->offset_save,
1568     rec->saved_max * sizeof(int));
1569 ph10 461 offset_top = rec->save_offset_top;
1570 nigel 93 ecode = rec->after_call;
1571     ims = original_ims;
1572     break;
1573 nigel 77 }
1574 nigel 91 }
1575 nigel 77
1576 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1577     flags, in case they got changed during the group. */
1578 nigel 77
1579 nigel 91 ims = original_ims;
1580     DPRINTF(("ims reset to %02lx\n", ims));
1581 nigel 77
1582 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1583     happens for a repeating ket if no characters were matched in the group.
1584     This is the forcible breaking of infinite loops as implemented in Perl
1585     5.005. If there is an options reset, it will get obeyed in the normal
1586     course of events. */
1587 nigel 77
1588 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1589     {
1590     ecode += 1 + LINK_SIZE;
1591     break;
1592     }
1593 nigel 77
1594 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1595     preceding bracket, in the appropriate order. In the second case, we can use
1596 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1597     unlimited repeat of a group that can match an empty string. */
1598 nigel 77
1599 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1600    
1601 nigel 91 if (*ecode == OP_KETRMIN)
1602     {
1603 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1604 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1605 ph10 197 if (flags != 0) /* Could match an empty string */
1606     {
1607     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1608     RRETURN(rrc);
1609     }
1610 nigel 91 ecode = prev;
1611     goto TAIL_RECURSE;
1612 nigel 77 }
1613 nigel 91 else /* OP_KETRMAX */
1614     {
1615 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1616 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617     ecode += 1 + LINK_SIZE;
1618 ph10 197 flags = 0;
1619 nigel 91 goto TAIL_RECURSE;
1620     }
1621     /* Control never gets here */
1622 nigel 77
1623     /* Start of subject unless notbol, or after internal newline if multiline */
1624    
1625     case OP_CIRC:
1626 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1627 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1628     {
1629 nigel 91 if (eptr != md->start_subject &&
1630 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1631 ph10 510 MRRETURN(MATCH_NOMATCH);
1632 nigel 77 ecode++;
1633     break;
1634     }
1635     /* ... else fall through */
1636    
1637     /* Start of subject assertion */
1638    
1639     case OP_SOD:
1640 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1641 nigel 77 ecode++;
1642     break;
1643    
1644     /* Start of match assertion */
1645    
1646     case OP_SOM:
1647 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1648 nigel 77 ecode++;
1649     break;
1650 ph10 172
1651 ph10 168 /* Reset the start of match point */
1652 ph10 172
1653 ph10 168 case OP_SET_SOM:
1654     mstart = eptr;
1655 ph10 172 ecode++;
1656     break;
1657 nigel 77
1658     /* Assert before internal newline if multiline, or before a terminating
1659     newline unless endonly is set, else end of subject unless noteol is set. */
1660    
1661     case OP_DOLL:
1662     if ((ims & PCRE_MULTILINE) != 0)
1663     {
1664     if (eptr < md->end_subject)
1665 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1666 nigel 77 else
1667 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1668 nigel 77 ecode++;
1669     break;
1670     }
1671     else
1672     {
1673 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1674 nigel 77 if (!md->endonly)
1675     {
1676 nigel 91 if (eptr != md->end_subject &&
1677 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1678 ph10 510 MRRETURN(MATCH_NOMATCH);
1679 nigel 77 ecode++;
1680     break;
1681     }
1682     }
1683 nigel 91 /* ... else fall through for endonly */
1684 nigel 77
1685     /* End of subject assertion (\z) */
1686    
1687     case OP_EOD:
1688 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1689 nigel 77 ecode++;
1690     break;
1691    
1692     /* End of subject or ending \n assertion (\Z) */
1693    
1694     case OP_EODN:
1695 nigel 91 if (eptr != md->end_subject &&
1696 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1697 ph10 510 MRRETURN(MATCH_NOMATCH);
1698 nigel 77 ecode++;
1699     break;
1700    
1701     /* Word boundary assertions */
1702    
1703     case OP_NOT_WORD_BOUNDARY:
1704     case OP_WORD_BOUNDARY:
1705     {
1706    
1707     /* Find out if the previous and current characters are "word" characters.
1708     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1709 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1710 ph10 435 partial matching. */
1711 nigel 77
1712     #ifdef SUPPORT_UTF8
1713     if (utf8)
1714     {
1715     if (eptr == md->start_subject) prev_is_word = FALSE; else
1716     {
1717 ph10 409 USPTR lastptr = eptr - 1;
1718 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1719 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1720 nigel 77 GETCHAR(c, lastptr);
1721     prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1722     }
1723 ph10 443 if (eptr >= md->end_subject)
1724 nigel 77 {
1725 ph10 443 SCHECK_PARTIAL();
1726     cur_is_word = FALSE;
1727 ph10 428 }
1728     else
1729     {
1730 nigel 77 GETCHAR(c, eptr);
1731     cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1732     }
1733     }
1734     else
1735     #endif
1736    
1737 ph10 428 /* Not in UTF-8 mode */
1738 nigel 77
1739     {
1740 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1741     {
1742 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1743 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1744     }
1745 ph10 443 if (eptr >= md->end_subject)
1746 ph10 428 {
1747 ph10 443 SCHECK_PARTIAL();
1748     cur_is_word = FALSE;
1749 ph10 428 }
1750     else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1751 nigel 77 }
1752    
1753     /* Now see if the situation is what we want */
1754    
1755     if ((*ecode++ == OP_WORD_BOUNDARY)?
1756     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1757 ph10 510 MRRETURN(MATCH_NOMATCH);
1758 nigel 77 }
1759     break;
1760    
1761     /* Match a single character type; inline for speed */
1762    
1763     case OP_ANY:
1764 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1765 ph10 345 /* Fall through */
1766    
1767 ph10 341 case OP_ALLANY:
1768 ph10 443 if (eptr++ >= md->end_subject)
1769 ph10 428 {
1770 ph10 443 SCHECK_PARTIAL();
1771 ph10 510 MRRETURN(MATCH_NOMATCH);
1772 ph10 443 }
1773 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1774 nigel 77 ecode++;
1775     break;
1776    
1777     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1778     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1779    
1780     case OP_ANYBYTE:
1781 ph10 443 if (eptr++ >= md->end_subject)
1782 ph10 428 {
1783 ph10 443 SCHECK_PARTIAL();
1784 ph10 510 MRRETURN(MATCH_NOMATCH);
1785 ph10 443 }
1786 nigel 77 ecode++;
1787     break;
1788    
1789     case OP_NOT_DIGIT:
1790 ph10 443 if (eptr >= md->end_subject)
1791 ph10 428 {
1792 ph10 443 SCHECK_PARTIAL();
1793 ph10 510 MRRETURN(MATCH_NOMATCH);
1794 ph10 443 }
1795 nigel 77 GETCHARINCTEST(c, eptr);
1796     if (
1797     #ifdef SUPPORT_UTF8
1798     c < 256 &&
1799     #endif
1800     (md->ctypes[c] & ctype_digit) != 0
1801     )
1802 ph10 510 MRRETURN(MATCH_NOMATCH);
1803 nigel 77 ecode++;
1804     break;
1805    
1806     case OP_DIGIT:
1807 ph10 443 if (eptr >= md->end_subject)
1808 ph10 428 {
1809 ph10 443 SCHECK_PARTIAL();
1810 ph10 510 MRRETURN(MATCH_NOMATCH);
1811 ph10 443 }
1812 nigel 77 GETCHARINCTEST(c, eptr);
1813     if (
1814     #ifdef SUPPORT_UTF8
1815     c >= 256 ||
1816     #endif
1817     (md->ctypes[c] & ctype_digit) == 0
1818     )
1819 ph10 510 MRRETURN(MATCH_NOMATCH);
1820 nigel 77 ecode++;
1821     break;
1822    
1823     case OP_NOT_WHITESPACE:
1824 ph10 443 if (eptr >= md->end_subject)
1825 ph10 428 {
1826 ph10 443 SCHECK_PARTIAL();
1827 ph10 510 MRRETURN(MATCH_NOMATCH);
1828 ph10 443 }
1829 nigel 77 GETCHARINCTEST(c, eptr);
1830     if (
1831     #ifdef SUPPORT_UTF8
1832     c < 256 &&
1833     #endif
1834     (md->ctypes[c] & ctype_space) != 0
1835     )
1836 ph10 510 MRRETURN(MATCH_NOMATCH);
1837 nigel 77 ecode++;
1838     break;
1839    
1840     case OP_WHITESPACE:
1841 ph10 443 if (eptr >= md->end_subject)
1842 ph10 428 {
1843 ph10 443 SCHECK_PARTIAL();
1844 ph10 510 MRRETURN(MATCH_NOMATCH);
1845 ph10 443 }
1846 nigel 77 GETCHARINCTEST(c, eptr);
1847     if (
1848     #ifdef SUPPORT_UTF8
1849     c >= 256 ||
1850     #endif
1851     (md->ctypes[c] & ctype_space) == 0
1852     )
1853 ph10 510 MRRETURN(MATCH_NOMATCH);
1854 nigel 77 ecode++;
1855     break;
1856    
1857     case OP_NOT_WORDCHAR:
1858 ph10 443 if (eptr >= md->end_subject)
1859 ph10 428 {
1860 ph10 443 SCHECK_PARTIAL();
1861 ph10 510 MRRETURN(MATCH_NOMATCH);
1862 ph10 443 }
1863 nigel 77 GETCHARINCTEST(c, eptr);
1864     if (
1865     #ifdef SUPPORT_UTF8
1866     c < 256 &&
1867     #endif
1868     (md->ctypes[c] & ctype_word) != 0
1869     )
1870 ph10 510 MRRETURN(MATCH_NOMATCH);
1871 nigel 77 ecode++;
1872     break;
1873    
1874     case OP_WORDCHAR:
1875 ph10 443 if (eptr >= md->end_subject)
1876 ph10 428 {
1877 ph10 443 SCHECK_PARTIAL();
1878 ph10 510 MRRETURN(MATCH_NOMATCH);
1879 ph10 443 }
1880 nigel 77 GETCHARINCTEST(c, eptr);
1881     if (
1882     #ifdef SUPPORT_UTF8
1883     c >= 256 ||
1884     #endif
1885     (md->ctypes[c] & ctype_word) == 0
1886     )
1887 ph10 510 MRRETURN(MATCH_NOMATCH);
1888 nigel 77 ecode++;
1889     break;
1890    
1891 nigel 93 case OP_ANYNL:
1892 ph10 443 if (eptr >= md->end_subject)
1893 ph10 428 {
1894 ph10 443 SCHECK_PARTIAL();
1895 ph10 510 MRRETURN(MATCH_NOMATCH);
1896 ph10 443 }
1897 nigel 93 GETCHARINCTEST(c, eptr);
1898     switch(c)
1899     {
1900 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1901 nigel 93 case 0x000d:
1902     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1903     break;
1904 ph10 231
1905 nigel 93 case 0x000a:
1906 ph10 231 break;
1907    
1908 nigel 93 case 0x000b:
1909     case 0x000c:
1910     case 0x0085:
1911     case 0x2028:
1912     case 0x2029:
1913 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1914 nigel 93 break;
1915     }
1916     ecode++;
1917     break;
1918    
1919 ph10 178 case OP_NOT_HSPACE:
1920 ph10 443 if (eptr >= md->end_subject)
1921 ph10 428 {
1922 ph10 443 SCHECK_PARTIAL();
1923 ph10 510 MRRETURN(MATCH_NOMATCH);
1924 ph10 443 }
1925 ph10 178 GETCHARINCTEST(c, eptr);
1926     switch(c)
1927     {
1928     default: break;
1929     case 0x09: /* HT */
1930     case 0x20: /* SPACE */
1931     case 0xa0: /* NBSP */
1932     case 0x1680: /* OGHAM SPACE MARK */
1933     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1934     case 0x2000: /* EN QUAD */
1935     case 0x2001: /* EM QUAD */
1936     case 0x2002: /* EN SPACE */
1937     case 0x2003: /* EM SPACE */
1938     case 0x2004: /* THREE-PER-EM SPACE */
1939     case 0x2005: /* FOUR-PER-EM SPACE */
1940     case 0x2006: /* SIX-PER-EM SPACE */
1941     case 0x2007: /* FIGURE SPACE */
1942     case 0x2008: /* PUNCTUATION SPACE */
1943     case 0x2009: /* THIN SPACE */
1944     case 0x200A: /* HAIR SPACE */
1945     case 0x202f: /* NARROW NO-BREAK SPACE */
1946     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1947     case 0x3000: /* IDEOGRAPHIC SPACE */
1948 ph10 510 MRRETURN(MATCH_NOMATCH);
1949 ph10 178 }
1950     ecode++;
1951     break;
1952    
1953     case OP_HSPACE:
1954 ph10 443 if (eptr >= md->end_subject)
1955 ph10 428 {
1956 ph10 443 SCHECK_PARTIAL();
1957 ph10 510 MRRETURN(MATCH_NOMATCH);
1958 ph10 443 }
1959 ph10 178 GETCHARINCTEST(c, eptr);
1960     switch(c)
1961     {
1962 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1963 ph10 178 case 0x09: /* HT */
1964     case 0x20: /* SPACE */
1965     case 0xa0: /* NBSP */
1966     case 0x1680: /* OGHAM SPACE MARK */
1967     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1968     case 0x2000: /* EN QUAD */
1969     case 0x2001: /* EM QUAD */
1970     case 0x2002: /* EN SPACE */
1971     case 0x2003: /* EM SPACE */
1972     case 0x2004: /* THREE-PER-EM SPACE */
1973     case 0x2005: /* FOUR-PER-EM SPACE */
1974     case 0x2006: /* SIX-PER-EM SPACE */
1975     case 0x2007: /* FIGURE SPACE */
1976     case 0x2008: /* PUNCTUATION SPACE */
1977     case 0x2009: /* THIN SPACE */
1978     case 0x200A: /* HAIR SPACE */
1979     case 0x202f: /* NARROW NO-BREAK SPACE */
1980     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1981     case 0x3000: /* IDEOGRAPHIC SPACE */
1982     break;
1983     }
1984     ecode++;
1985     break;
1986    
1987     case OP_NOT_VSPACE:
1988 ph10 443 if (eptr >= md->end_subject)
1989 ph10 428 {
1990 ph10 443 SCHECK_PARTIAL();
1991 ph10 510 MRRETURN(MATCH_NOMATCH);
1992 ph10 443 }
1993 ph10 178 GETCHARINCTEST(c, eptr);
1994     switch(c)
1995     {
1996     default: break;
1997     case 0x0a: /* LF */
1998     case 0x0b: /* VT */
1999     case 0x0c: /* FF */
2000     case 0x0d: /* CR */
2001     case 0x85: /* NEL */
2002     case 0x2028: /* LINE SEPARATOR */
2003     case 0x2029: /* PARAGRAPH SEPARATOR */
2004 ph10 510 MRRETURN(MATCH_NOMATCH);
2005 ph10 178 }
2006     ecode++;
2007     break;
2008    
2009     case OP_VSPACE:
2010 ph10 443 if (eptr >= md->end_subject)
2011 ph10 428 {
2012 ph10 443 SCHECK_PARTIAL();
2013 ph10 510 MRRETURN(MATCH_NOMATCH);
2014 ph10 443 }
2015 ph10 178 GETCHARINCTEST(c, eptr);
2016     switch(c)
2017     {
2018 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2019 ph10 178 case 0x0a: /* LF */
2020     case 0x0b: /* VT */
2021     case 0x0c: /* FF */
2022     case 0x0d: /* CR */
2023     case 0x85: /* NEL */
2024     case 0x2028: /* LINE SEPARATOR */
2025     case 0x2029: /* PARAGRAPH SEPARATOR */
2026     break;
2027     }
2028     ecode++;
2029     break;
2030    
2031 nigel 77 #ifdef SUPPORT_UCP
2032     /* Check the next character by Unicode property. We will get here only
2033     if the support is in the binary; otherwise a compile-time error occurs. */
2034    
2035     case OP_PROP:
2036     case OP_NOTPROP:
2037 ph10 443 if (eptr >= md->end_subject)
2038 ph10 428 {
2039 ph10 443 SCHECK_PARTIAL();
2040 ph10 510 MRRETURN(MATCH_NOMATCH);
2041 ph10 443 }
2042 nigel 77 GETCHARINCTEST(c, eptr);
2043     {
2044 ph10 384 const ucd_record *prop = GET_UCD(c);
2045 nigel 77
2046 nigel 87 switch(ecode[1])
2047     {
2048     case PT_ANY:
2049 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2050 nigel 87 break;
2051 nigel 77
2052 nigel 87 case PT_LAMP:
2053 ph10 349 if ((prop->chartype == ucp_Lu ||
2054     prop->chartype == ucp_Ll ||
2055     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2056 ph10 510 MRRETURN(MATCH_NOMATCH);
2057 nigel 87 break;
2058    
2059     case PT_GC:
2060 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2061 ph10 510 MRRETURN(MATCH_NOMATCH);
2062 nigel 87 break;
2063    
2064     case PT_PC:
2065 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2066 ph10 510 MRRETURN(MATCH_NOMATCH);
2067 nigel 87 break;
2068    
2069     case PT_SC:
2070 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2071 ph10 510 MRRETURN(MATCH_NOMATCH);
2072 nigel 87 break;
2073    
2074     default:
2075     RRETURN(PCRE_ERROR_INTERNAL);
2076 nigel 77 }
2077 nigel 87
2078     ecode += 3;
2079 nigel 77 }
2080     break;
2081    
2082     /* Match an extended Unicode sequence. We will get here only if the support
2083     is in the binary; otherwise a compile-time error occurs. */
2084    
2085     case OP_EXTUNI:
2086 ph10 443 if (eptr >= md->end_subject)
2087 ph10 428 {
2088 ph10 443 SCHECK_PARTIAL();
2089 ph10 510 MRRETURN(MATCH_NOMATCH);
2090 ph10 443 }
2091 nigel 77 GETCHARINCTEST(c, eptr);
2092     {
2093 ph10 349 int category = UCD_CATEGORY(c);
2094 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2095 nigel 77 while (eptr < md->end_subject)
2096     {
2097     int len = 1;
2098     if (!utf8) c = *eptr; else
2099     {
2100     GETCHARLEN(c, eptr, len);
2101     }
2102 ph10 349 category = UCD_CATEGORY(c);
2103 nigel 77 if (category != ucp_M) break;
2104     eptr += len;
2105     }
2106     }
2107     ecode++;
2108     break;
2109     #endif
2110    
2111    
2112     /* Match a back reference, possibly repeatedly. Look past the end of the
2113     item to see if there is repeat information following. The code is similar
2114     to that for character classes, but repeated for efficiency. Then obey
2115     similar code to character type repeats - written out again for speed.
2116     However, if the referenced string is the empty string, always treat
2117     it as matched, any number of times (otherwise there could be infinite
2118     loops). */
2119    
2120     case OP_REF:
2121     {
2122     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2123 ph10 345 ecode += 3;
2124    
2125 ph10 336 /* If the reference is unset, there are two possibilities:
2126 ph10 345
2127 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2128     than the amount of subject left; this ensures that every attempt at a
2129     match fails. We can't just fail here, because of the possibility of
2130     quantifiers with zero minima.
2131 ph10 345
2132     (b) If the JavaScript compatibility flag is set, set the length to zero
2133     so that the back reference matches an empty string.
2134    
2135     Otherwise, set the length to the length of what was matched by the
2136 ph10 336 referenced subpattern. */
2137 ph10 345
2138 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2139 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2140 ph10 336 else
2141     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2142 nigel 77
2143     /* Set up for repetition, or handle the non-repeated case */
2144    
2145     switch (*ecode)
2146     {
2147     case OP_CRSTAR:
2148     case OP_CRMINSTAR:
2149     case OP_CRPLUS:
2150     case OP_CRMINPLUS:
2151     case OP_CRQUERY:
2152     case OP_CRMINQUERY:
2153     c = *ecode++ - OP_CRSTAR;
2154     minimize = (c & 1) != 0;
2155     min = rep_min[c]; /* Pick up values from tables; */
2156     max = rep_max[c]; /* zero for max => infinity */
2157     if (max == 0) max = INT_MAX;
2158     break;
2159    
2160     case OP_CRRANGE:
2161     case OP_CRMINRANGE:
2162     minimize = (*ecode == OP_CRMINRANGE);
2163     min = GET2(ecode, 1);
2164     max = GET2(ecode, 3);
2165     if (max == 0) max = INT_MAX;
2166     ecode += 5;
2167     break;
2168    
2169     default: /* No repeat follows */
2170 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2171 ph10 428 {
2172 ph10 443 CHECK_PARTIAL();
2173 ph10 510 MRRETURN(MATCH_NOMATCH);
2174 ph10 443 }
2175 nigel 77 eptr += length;
2176     continue; /* With the main loop */
2177     }
2178    
2179     /* If the length of the reference is zero, just continue with the
2180     main loop. */
2181 ph10 443
2182 nigel 77 if (length == 0) continue;
2183    
2184     /* First, ensure the minimum number of matches are present. We get back
2185     the length of the reference string explicitly rather than passing the
2186     address of eptr, so that eptr can be a register variable. */
2187    
2188     for (i = 1; i <= min; i++)
2189     {
2190 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2191 ph10 426 {
2192 ph10 427 CHECK_PARTIAL();
2193 ph10 510 MRRETURN(MATCH_NOMATCH);
2194 ph10 427 }
2195 nigel 77 eptr += length;
2196     }
2197    
2198     /* If min = max, continue at the same level without recursion.
2199     They are not both allowed to be zero. */
2200    
2201     if (min == max) continue;
2202    
2203     /* If minimizing, keep trying and advancing the pointer */
2204    
2205     if (minimize)
2206     {
2207     for (fi = min;; fi++)
2208     {
2209 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2210 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2211 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2212 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2213 ph10 426 {
2214 ph10 427 CHECK_PARTIAL();
2215 ph10 510 MRRETURN(MATCH_NOMATCH);
2216 ph10 427 }
2217 nigel 77 eptr += length;
2218     }
2219     /* Control never gets here */
2220     }
2221    
2222     /* If maximizing, find the longest string and work backwards */
2223    
2224     else
2225     {
2226     pp = eptr;
2227     for (i = min; i < max; i++)
2228     {
2229 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2230 ph10 462 {
2231 ph10 463 CHECK_PARTIAL();
2232 ph10 462 break;
2233 ph10 463 }
2234 nigel 77 eptr += length;
2235     }
2236     while (eptr >= pp)
2237     {
2238 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2239 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240     eptr -= length;
2241     }
2242 ph10 510 MRRETURN(MATCH_NOMATCH);
2243 nigel 77 }
2244     }
2245     /* Control never gets here */
2246    
2247     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2248     used when all the characters in the class have values in the range 0-255,
2249     and either the matching is caseful, or the characters are in the range
2250     0-127 when UTF-8 processing is enabled. The only difference between
2251     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2252     encountered.
2253    
2254     First, look past the end of the item to see if there is repeat information
2255     following. Then obey similar code to character type repeats - written out
2256     again for speed. */
2257    
2258     case OP_NCLASS:
2259     case OP_CLASS:
2260     {
2261     data = ecode + 1; /* Save for matching */
2262     ecode += 33; /* Advance past the item */
2263    
2264     switch (*ecode)
2265     {
2266     case OP_CRSTAR:
2267     case OP_CRMINSTAR:
2268     case OP_CRPLUS:
2269     case OP_CRMINPLUS:
2270     case OP_CRQUERY:
2271     case OP_CRMINQUERY:
2272     c = *ecode++ - OP_CRSTAR;
2273     minimize = (c & 1) != 0;
2274     min = rep_min[c]; /* Pick up values from tables; */
2275     max = rep_max[c]; /* zero for max => infinity */
2276     if (max == 0) max = INT_MAX;
2277     break;
2278    
2279     case OP_CRRANGE:
2280     case OP_CRMINRANGE:
2281     minimize = (*ecode == OP_CRMINRANGE);
2282     min = GET2(ecode, 1);
2283     max = GET2(ecode, 3);
2284     if (max == 0) max = INT_MAX;
2285     ecode += 5;
2286     break;
2287    
2288     default: /* No repeat follows */
2289     min = max = 1;
2290     break;
2291     }
2292    
2293     /* First, ensure the minimum number of matches are present. */
2294    
2295     #ifdef SUPPORT_UTF8
2296     /* UTF-8 mode */
2297     if (utf8)
2298     {
2299     for (i = 1; i <= min; i++)
2300     {
2301 ph10 427 if (eptr >= md->end_subject)
2302 ph10 426 {
2303 ph10 428 SCHECK_PARTIAL();
2304 ph10 510 MRRETURN(MATCH_NOMATCH);
2305 ph10 427 }
2306 nigel 77 GETCHARINC(c, eptr);
2307     if (c > 255)
2308     {
2309 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2310 nigel 77 }
2311     else
2312     {
2313 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2314 nigel 77 }
2315     }
2316     }
2317     else
2318     #endif
2319     /* Not UTF-8 mode */
2320     {
2321     for (i = 1; i <= min; i++)
2322     {
2323 ph10 427 if (eptr >= md->end_subject)
2324 ph10 426 {
2325 ph10 428 SCHECK_PARTIAL();
2326 ph10 510 MRRETURN(MATCH_NOMATCH);
2327 ph10 427 }
2328 nigel 77 c = *eptr++;
2329 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2330 nigel 77 }
2331     }
2332    
2333     /* If max == min we can continue with the main loop without the
2334     need to recurse. */
2335    
2336     if (min == max) continue;
2337    
2338     /* If minimizing, keep testing the rest of the expression and advancing
2339     the pointer while it matches the class. */
2340    
2341     if (minimize)
2342     {
2343     #ifdef SUPPORT_UTF8
2344     /* UTF-8 mode */
2345     if (utf8)
2346     {
2347     for (fi = min;; fi++)
2348     {
2349 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2350 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2351 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2352 ph10 427 if (eptr >= md->end_subject)
2353 ph10 426 {
2354 ph10 427 SCHECK_PARTIAL();
2355 ph10 510 MRRETURN(MATCH_NOMATCH);
2356 ph10 427 }
2357 nigel 77 GETCHARINC(c, eptr);
2358     if (c > 255)
2359     {
2360 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2361 nigel 77 }
2362     else
2363     {
2364 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2365 nigel 77 }
2366     }
2367     }
2368     else
2369     #endif
2370     /* Not UTF-8 mode */
2371     {
2372     for (fi = min;; fi++)
2373     {
2374 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2375 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2377 ph10 427 if (eptr >= md->end_subject)
2378 ph10 426 {
2379 ph10 427 SCHECK_PARTIAL();
2380 ph10 510 MRRETURN(MATCH_NOMATCH);
2381 ph10 427 }
2382 nigel 77 c = *eptr++;
2383 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2384 nigel 77 }
2385     }
2386     /* Control never gets here */
2387     }
2388    
2389     /* If maximizing, find the longest possible run, then work backwards. */
2390    
2391     else
2392     {
2393     pp = eptr;
2394    
2395     #ifdef SUPPORT_UTF8
2396     /* UTF-8 mode */
2397     if (utf8)
2398     {
2399     for (i = min; i < max; i++)
2400     {
2401     int len = 1;
2402 ph10 463 if (eptr >= md->end_subject)
2403 ph10 462 {
2404 ph10 463 SCHECK_PARTIAL();
2405 ph10 462 break;
2406 ph10 463 }
2407 nigel 77 GETCHARLEN(c, eptr, len);
2408     if (c > 255)
2409     {
2410     if (op == OP_CLASS) break;
2411     }
2412     else
2413     {
2414     if ((data[c/8] & (1 << (c&7))) == 0) break;
2415     }
2416     eptr += len;
2417     }
2418     for (;;)
2419     {
2420 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2421 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2422     if (eptr-- == pp) break; /* Stop if tried at original pos */
2423     BACKCHAR(eptr);
2424     }
2425     }
2426     else
2427     #endif
2428     /* Not UTF-8 mode */
2429     {
2430     for (i = min; i < max; i++)
2431     {
2432 ph10 463 if (eptr >= md->end_subject)
2433 ph10 462 {
2434 ph10 463 SCHECK_PARTIAL();
2435 ph10 462 break;
2436 ph10 463 }
2437 nigel 77 c = *eptr;
2438     if ((data[c/8] & (1 << (c&7))) == 0) break;
2439     eptr++;
2440     }
2441     while (eptr >= pp)
2442     {
2443 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2444 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2445 nigel 77 eptr--;
2446     }
2447     }
2448    
2449 ph10 510 MRRETURN(MATCH_NOMATCH);
2450 nigel 77 }
2451     }
2452     /* Control never gets here */
2453    
2454    
2455     /* Match an extended character class. This opcode is encountered only
2456 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2457     mode, because Unicode properties are supported in non-UTF-8 mode. */
2458 nigel 77
2459     #ifdef SUPPORT_UTF8
2460     case OP_XCLASS:
2461     {
2462     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2463     ecode += GET(ecode, 1); /* Advance past the item */
2464    
2465     switch (*ecode)
2466     {
2467     case OP_CRSTAR:
2468     case OP_CRMINSTAR:
2469     case OP_CRPLUS:
2470     case OP_CRMINPLUS:
2471     case OP_CRQUERY:
2472     case OP_CRMINQUERY:
2473     c = *ecode++ - OP_CRSTAR;
2474     minimize = (c & 1) != 0;
2475     min = rep_min[c]; /* Pick up values from tables; */
2476     max = rep_max[c]; /* zero for max => infinity */
2477     if (max == 0) max = INT_MAX;
2478     break;
2479    
2480     case OP_CRRANGE:
2481     case OP_CRMINRANGE:
2482     minimize = (*ecode == OP_CRMINRANGE);
2483     min = GET2(ecode, 1);
2484     max = GET2(ecode, 3);
2485     if (max == 0) max = INT_MAX;
2486     ecode += 5;
2487     break;
2488    
2489     default: /* No repeat follows */
2490     min = max = 1;
2491     break;
2492     }
2493    
2494     /* First, ensure the minimum number of matches are present. */
2495    
2496     for (i = 1; i <= min; i++)
2497     {
2498 ph10 427 if (eptr >= md->end_subject)
2499 ph10 426 {
2500     SCHECK_PARTIAL();
2501 ph10 510 MRRETURN(MATCH_NOMATCH);
2502 ph10 427 }
2503 ph10 384 GETCHARINCTEST(c, eptr);
2504 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2505 nigel 77 }
2506    
2507     /* If max == min we can continue with the main loop without the
2508     need to recurse. */
2509    
2510     if (min == max) continue;
2511    
2512     /* If minimizing, keep testing the rest of the expression and advancing
2513     the pointer while it matches the class. */
2514    
2515     if (minimize)
2516     {
2517     for (fi = min;; fi++)
2518     {
2519 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2520 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522 ph10 427 if (eptr >= md->end_subject)
2523 ph10 426 {
2524 ph10 427 SCHECK_PARTIAL();
2525 ph10 510 MRRETURN(MATCH_NOMATCH);
2526 ph10 427 }
2527 ph10 384 GETCHARINCTEST(c, eptr);
2528 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2529 nigel 77 }
2530     /* Control never gets here */
2531     }
2532    
2533     /* If maximizing, find the longest possible run, then work backwards. */
2534    
2535     else
2536     {
2537     pp = eptr;
2538     for (i = min; i < max; i++)
2539     {
2540     int len = 1;
2541 ph10 463 if (eptr >= md->end_subject)
2542 ph10 462 {
2543 ph10 463 SCHECK_PARTIAL();
2544 ph10 462 break;
2545 ph10 463 }
2546 ph10 384 GETCHARLENTEST(c, eptr, len);
2547 nigel 77 if (!_pcre_xclass(c, data)) break;
2548     eptr += len;
2549     }
2550     for(;;)
2551     {
2552 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2553 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2554     if (eptr-- == pp) break; /* Stop if tried at original pos */
2555 ph10 214 if (utf8) BACKCHAR(eptr);
2556 nigel 77 }
2557 ph10 510 MRRETURN(MATCH_NOMATCH);
2558 nigel 77 }
2559    
2560     /* Control never gets here */
2561     }
2562     #endif /* End of XCLASS */
2563    
2564     /* Match a single character, casefully */
2565    
2566     case OP_CHAR:
2567     #ifdef SUPPORT_UTF8
2568     if (utf8)
2569     {
2570     length = 1;
2571     ecode++;
2572     GETCHARLEN(fc, ecode, length);
2573 ph10 443 if (length > md->end_subject - eptr)
2574 ph10 428 {
2575     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2576 ph10 510 MRRETURN(MATCH_NOMATCH);
2577 ph10 443 }
2578 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2579 nigel 77 }
2580     else
2581     #endif
2582    
2583     /* Non-UTF-8 mode */
2584     {
2585 ph10 443 if (md->end_subject - eptr < 1)
2586 ph10 428 {
2587     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2588 ph10 510 MRRETURN(MATCH_NOMATCH);
2589 ph10 443 }
2590 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2591 nigel 77 ecode += 2;
2592     }
2593     break;
2594    
2595     /* Match a single character, caselessly */
2596    
2597     case OP_CHARNC:
2598     #ifdef SUPPORT_UTF8
2599     if (utf8)
2600     {
2601     length = 1;
2602     ecode++;
2603     GETCHARLEN(fc, ecode, length);
2604    
2605 ph10 443 if (length > md->end_subject - eptr)
2606 ph10 428 {
2607     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2608 ph10 510 MRRETURN(MATCH_NOMATCH);
2609 ph10 443 }
2610 nigel 77
2611     /* If the pattern character's value is < 128, we have only one byte, and
2612     can use the fast lookup table. */
2613    
2614     if (fc < 128)
2615     {
2616 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2617 nigel 77 }
2618    
2619     /* Otherwise we must pick up the subject character */
2620    
2621     else
2622     {
2623 nigel 93 unsigned int dc;
2624 nigel 77 GETCHARINC(dc, eptr);
2625     ecode += length;
2626    
2627     /* If we have Unicode property support, we can use it to test the other
2628 nigel 87 case of the character, if there is one. */
2629 nigel 77
2630     if (fc != dc)
2631     {
2632     #ifdef SUPPORT_UCP
2633 ph10 349 if (dc != UCD_OTHERCASE(fc))
2634 nigel 77 #endif
2635 ph10 510 MRRETURN(MATCH_NOMATCH);
2636 nigel 77 }
2637     }
2638     }
2639     else
2640     #endif /* SUPPORT_UTF8 */
2641    
2642     /* Non-UTF-8 mode */
2643     {
2644 ph10 443 if (md->end_subject - eptr < 1)
2645 ph10 428 {
2646 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2647 ph10 510 MRRETURN(MATCH_NOMATCH);
2648 ph10 443 }
2649 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2650 nigel 77 ecode += 2;
2651     }
2652     break;
2653    
2654 nigel 93 /* Match a single character repeatedly. */
2655 nigel 77
2656     case OP_EXACT:
2657     min = max = GET2(ecode, 1);
2658     ecode += 3;
2659     goto REPEATCHAR;
2660    
2661 nigel 93 case OP_POSUPTO:
2662     possessive = TRUE;
2663     /* Fall through */
2664    
2665 nigel 77 case OP_UPTO:
2666     case OP_MINUPTO:
2667     min = 0;
2668     max = GET2(ecode, 1);
2669     minimize = *ecode == OP_MINUPTO;
2670     ecode += 3;
2671     goto REPEATCHAR;
2672    
2673 nigel 93 case OP_POSSTAR:
2674     possessive = TRUE;
2675     min = 0;
2676     max = INT_MAX;
2677     ecode++;
2678     goto REPEATCHAR;
2679    
2680     case OP_POSPLUS:
2681     possessive = TRUE;
2682     min = 1;
2683     max = INT_MAX;
2684     ecode++;
2685     goto REPEATCHAR;
2686    
2687     case OP_POSQUERY:
2688     possessive = TRUE;
2689     min = 0;
2690     max = 1;
2691     ecode++;
2692     goto REPEATCHAR;
2693    
2694 nigel 77 case OP_STAR:
2695     case OP_MINSTAR:
2696     case OP_PLUS:
2697     case OP_MINPLUS:
2698     case OP_QUERY:
2699     case OP_MINQUERY:
2700     c = *ecode++ - OP_STAR;
2701     minimize = (c & 1) != 0;
2702 ph10 443
2703 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2704     max = rep_max[c]; /* zero for max => infinity */
2705     if (max == 0) max = INT_MAX;
2706    
2707 ph10 426 /* Common code for all repeated single-character matches. */
2708 nigel 77
2709     REPEATCHAR:
2710     #ifdef SUPPORT_UTF8
2711     if (utf8)
2712     {
2713     length = 1;
2714     charptr = ecode;
2715     GETCHARLEN(fc, ecode, length);
2716     ecode += length;
2717    
2718     /* Handle multibyte character matching specially here. There is
2719     support for caseless matching if UCP support is present. */
2720    
2721     if (length > 1)
2722     {
2723     #ifdef SUPPORT_UCP
2724 nigel 93 unsigned int othercase;
2725 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2726 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2727 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2728 ph10 115 else oclength = 0;
2729 nigel 77 #endif /* SUPPORT_UCP */
2730    
2731     for (i = 1; i <= min; i++)
2732     {
2733 ph10 426 if (eptr <= md->end_subject - length &&
2734     memcmp(eptr, charptr, length) == 0) eptr += length;
2735 ph10 123 #ifdef SUPPORT_UCP
2736 ph10 426 else if (oclength > 0 &&
2737     eptr <= md->end_subject - oclength &&
2738     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2739     #endif /* SUPPORT_UCP */
2740 nigel 77 else
2741     {
2742 ph10 426 CHECK_PARTIAL();
2743 ph10 510 MRRETURN(MATCH_NOMATCH);
2744 nigel 77 }
2745     }
2746    
2747     if (min == max) continue;
2748    
2749     if (minimize)
2750     {
2751     for (fi = min;; fi++)
2752     {
2753 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2754 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2755 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2756 ph10 426 if (eptr <= md->end_subject - length &&
2757     memcmp(eptr, charptr, length) == 0) eptr += length;
2758 ph10 123 #ifdef SUPPORT_UCP
2759 ph10 426 else if (oclength > 0 &&
2760     eptr <= md->end_subject - oclength &&
2761     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2762     #endif /* SUPPORT_UCP */
2763 nigel 77 else
2764     {
2765 ph10 426 CHECK_PARTIAL();
2766 ph10 510 MRRETURN(MATCH_NOMATCH);
2767 nigel 77 }
2768     }
2769     /* Control never gets here */
2770     }
2771 nigel 93
2772     else /* Maximize */
2773 nigel 77 {
2774     pp = eptr;
2775     for (i = min; i < max; i++)
2776     {
2777 ph10 426 if (eptr <= md->end_subject - length &&
2778     memcmp(eptr, charptr, length) == 0) eptr += length;
2779 ph10 123 #ifdef SUPPORT_UCP
2780 ph10 426 else if (oclength > 0 &&
2781     eptr <= md->end_subject - oclength &&
2782     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2783     #endif /* SUPPORT_UCP */
2784 ph10 463 else
2785 ph10 462 {
2786 ph10 463 CHECK_PARTIAL();
2787 ph10 462 break;
2788 ph10 463 }
2789 nigel 77 }
2790 nigel 93
2791     if (possessive) continue;
2792 ph10 427
2793 ph10 120 for(;;)
2794 ph10 426 {
2795     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2796     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2797 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2798 ph10 115 #ifdef SUPPORT_UCP
2799 ph10 426 eptr--;
2800     BACKCHAR(eptr);
2801 ph10 123 #else /* without SUPPORT_UCP */
2802 ph10 426 eptr -= length;
2803 ph10 123 #endif /* SUPPORT_UCP */
2804 ph10 426 }
2805 nigel 77 }
2806     /* Control never gets here */
2807     }
2808    
2809     /* If the length of a UTF-8 character is 1, we fall through here, and
2810     obey the code as for non-UTF-8 characters below, though in this case the
2811     value of fc will always be < 128. */
2812     }
2813     else
2814     #endif /* SUPPORT_UTF8 */
2815    
2816     /* When not in UTF-8 mode, load a single-byte character. */
2817    
2818 ph10 426 fc = *ecode++;
2819 ph10 443
2820 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2821     may not be in UTF-8 mode. The code is duplicated for the caseless and
2822     caseful cases, for speed, since matching characters is likely to be quite
2823     common. First, ensure the minimum number of matches are present. If min =
2824     max, continue at the same level without recursing. Otherwise, if
2825     minimizing, keep trying the rest of the expression and advancing one
2826     matching character if failing, up to the maximum. Alternatively, if
2827     maximizing, find the maximum number of characters and work backwards. */
2828    
2829     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2830     max, eptr));
2831    
2832     if ((ims & PCRE_CASELESS) != 0)
2833     {
2834     fc = md->lcc[fc];
2835     for (i = 1; i <= min; i++)
2836 ph10 426 {
2837     if (eptr >= md->end_subject)
2838     {
2839     SCHECK_PARTIAL();
2840 ph10 510 MRRETURN(MATCH_NOMATCH);
2841 ph10 426 }
2842 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2843 ph10 426 }
2844 nigel 77 if (min == max) continue;
2845     if (minimize)
2846     {
2847     for (fi = min;; fi++)
2848     {
2849 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2850 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2852 ph10 426 if (eptr >= md->end_subject)
2853     {
2854 ph10 427 SCHECK_PARTIAL();
2855 ph10 510 MRRETURN(MATCH_NOMATCH);
2856 ph10 426 }
2857 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2858 nigel 77 }
2859     /* Control never gets here */
2860     }
2861 nigel 93 else /* Maximize */
2862 nigel 77 {
2863     pp = eptr;
2864     for (i = min; i < max; i++)
2865     {
2866 ph10 463 if (eptr >= md->end_subject)
2867 ph10 462 {
2868     SCHECK_PARTIAL();
2869     break;
2870 ph10 463 }
2871 ph10 462 if (fc != md->lcc[*eptr]) break;
2872 nigel 77 eptr++;
2873     }
2874 ph10 427
2875 nigel 93 if (possessive) continue;
2876 ph10 427
2877 nigel 77 while (eptr >= pp)
2878     {
2879 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2880 nigel 77 eptr--;
2881     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2882     }
2883 ph10 510 MRRETURN(MATCH_NOMATCH);
2884 nigel 77 }
2885     /* Control never gets here */
2886     }
2887    
2888     /* Caseful comparisons (includes all multi-byte characters) */
2889    
2890     else
2891     {
2892 ph10 427 for (i = 1; i <= min; i++)
2893 ph10 426 {
2894     if (eptr >= md->end_subject)
2895     {
2896     SCHECK_PARTIAL();
2897 ph10 510 MRRETURN(MATCH_NOMATCH);
2898 ph10 426 }
2899 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2900 ph10 427 }
2901 ph10 443
2902 nigel 77 if (min == max) continue;
2903 ph10 443
2904 nigel 77 if (minimize)
2905     {
2906     for (fi = min;; fi++)
2907     {
2908 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2909 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2911 ph10 426 if (eptr >= md->end_subject)
2912 ph10 427 {
2913 ph10 426 SCHECK_PARTIAL();
2914 ph10 510 MRRETURN(MATCH_NOMATCH);
2915 ph10 427 }
2916 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2917 nigel 77 }
2918     /* Control never gets here */
2919     }
2920 nigel 93 else /* Maximize */
2921 nigel 77 {
2922     pp = eptr;
2923     for (i = min; i < max; i++)
2924     {
2925 ph10 463 if (eptr >= md->end_subject)
2926 ph10 462 {
2927 ph10 463 SCHECK_PARTIAL();
2928 ph10 462 break;
2929 ph10 463 }
2930 ph10 462 if (fc != *eptr) break;
2931 nigel 77 eptr++;
2932     }
2933 nigel 93 if (possessive) continue;
2934 ph10 443
2935 nigel 77 while (eptr >= pp)
2936     {
2937 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2938 nigel 77 eptr--;
2939     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940     }
2941 ph10 510 MRRETURN(MATCH_NOMATCH);
2942 nigel 77 }
2943     }
2944     /* Control never gets here */
2945    
2946     /* Match a negated single one-byte character. The character we are
2947     checking can be multibyte. */
2948    
2949     case OP_NOT:
2950 ph10 443 if (eptr >= md->end_subject)
2951 ph10 428 {
2952 ph10 443 SCHECK_PARTIAL();
2953 ph10 510 MRRETURN(MATCH_NOMATCH);
2954 ph10 443 }
2955 nigel 77 ecode++;
2956     GETCHARINCTEST(c, eptr);
2957     if ((ims & PCRE_CASELESS) != 0)
2958     {
2959     #ifdef SUPPORT_UTF8
2960     if (c < 256)
2961     #endif
2962     c = md->lcc[c];
2963 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2964 nigel 77 }
2965     else
2966     {
2967 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2968 nigel 77 }
2969     break;
2970    
2971     /* Match a negated single one-byte character repeatedly. This is almost a
2972     repeat of the code for a repeated single character, but I haven't found a
2973     nice way of commoning these up that doesn't require a test of the
2974     positive/negative option for each character match. Maybe that wouldn't add
2975     very much to the time taken, but character matching *is* what this is all
2976     about... */
2977    
2978     case OP_NOTEXACT:
2979     min = max = GET2(ecode, 1);
2980     ecode += 3;
2981     goto REPEATNOTCHAR;
2982    
2983     case OP_NOTUPTO:
2984     case OP_NOTMINUPTO:
2985     min = 0;
2986     max = GET2(ecode, 1);
2987     minimize = *ecode == OP_NOTMINUPTO;
2988     ecode += 3;
2989     goto REPEATNOTCHAR;
2990    
2991 nigel 93 case OP_NOTPOSSTAR:
2992     possessive = TRUE;
2993     min = 0;
2994     max = INT_MAX;
2995     ecode++;
2996     goto REPEATNOTCHAR;
2997    
2998     case OP_NOTPOSPLUS:
2999     possessive = TRUE;
3000     min = 1;
3001     max = INT_MAX;
3002     ecode++;
3003     goto REPEATNOTCHAR;
3004    
3005     case OP_NOTPOSQUERY:
3006     possessive = TRUE;
3007     min = 0;
3008     max = 1;
3009     ecode++;
3010     goto REPEATNOTCHAR;
3011    
3012     case OP_NOTPOSUPTO:
3013     possessive = TRUE;
3014     min = 0;
3015     max = GET2(ecode, 1);
3016     ecode += 3;
3017     goto REPEATNOTCHAR;
3018    
3019 nigel 77 case OP_NOTSTAR:
3020     case OP_NOTMINSTAR:
3021     case OP_NOTPLUS:
3022     case OP_NOTMINPLUS:
3023     case OP_NOTQUERY:
3024     case OP_NOTMINQUERY:
3025     c = *ecode++ - OP_NOTSTAR;
3026     minimize = (c & 1) != 0;
3027     min = rep_min[c]; /* Pick up values from tables; */
3028     max = rep_max[c]; /* zero for max => infinity */
3029     if (max == 0) max = INT_MAX;
3030    
3031 ph10 426 /* Common code for all repeated single-byte matches. */
3032 nigel 77
3033     REPEATNOTCHAR:
3034     fc = *ecode++;
3035    
3036     /* The code is duplicated for the caseless and caseful cases, for speed,
3037     since matching characters is likely to be quite common. First, ensure the
3038     minimum number of matches are present. If min = max, continue at the same
3039     level without recursing. Otherwise, if minimizing, keep trying the rest of
3040     the expression and advancing one matching character if failing, up to the
3041     maximum. Alternatively, if maximizing, find the maximum number of
3042     characters and work backwards. */
3043    
3044     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3045     max, eptr));
3046    
3047     if ((ims & PCRE_CASELESS) != 0)
3048     {
3049     fc = md->lcc[fc];
3050    
3051     #ifdef SUPPORT_UTF8
3052     /* UTF-8 mode */
3053     if (utf8)
3054     {
3055 nigel 93 register unsigned int d;
3056 nigel 77 for (i = 1; i <= min; i++)
3057     {
3058 ph10 426 if (eptr >= md->end_subject)
3059     {
3060     SCHECK_PARTIAL();
3061 ph10 510 MRRETURN(MATCH_NOMATCH);
3062 ph10 427 }
3063 nigel 77 GETCHARINC(d, eptr);
3064     if (d < 256) d = md->lcc[d];
3065 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3066 nigel 77 }
3067     }
3068     else
3069     #endif
3070    
3071     /* Not UTF-8 mode */
3072     {
3073     for (i = 1; i <= min; i++)
3074 ph10 426 {
3075     if (eptr >= md->end_subject)
3076     {
3077     SCHECK_PARTIAL();
3078 ph10 510 MRRETURN(MATCH_NOMATCH);
3079 ph10 427 }
3080 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3081 ph10 427 }
3082 nigel 77 }
3083    
3084     if (min == max) continue;
3085    
3086     if (minimize)
3087     {
3088     #ifdef SUPPORT_UTF8
3089     /* UTF-8 mode */
3090     if (utf8)
3091     {
3092 nigel 93 register unsigned int d;
3093 nigel 77 for (fi = min;; fi++)
3094     {
3095 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3096 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3098 ph10 427 if (eptr >= md->end_subject)
3099 ph10 426 {
3100 ph10 427 SCHECK_PARTIAL();
3101 ph10 510 MRRETURN(MATCH_NOMATCH);
3102 ph10 427 }
3103 nigel 77 GETCHARINC(d, eptr);
3104     if (d < 256) d = md->lcc[d];
3105 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3106 nigel 77 }
3107     }
3108     else
3109     #endif
3110     /* Not UTF-8 mode */
3111     {
3112     for (fi = min;; fi++)
3113     {
3114 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3115 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3117 ph10 426 if (eptr >= md->end_subject)
3118     {
3119     SCHECK_PARTIAL();
3120 ph10 510 MRRETURN(MATCH_NOMATCH);
3121 ph10 426 }
3122 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3123 nigel 77 }
3124     }
3125     /* Control never gets here */
3126     }
3127    
3128     /* Maximize case */
3129    
3130     else
3131     {
3132     pp = eptr;
3133    
3134     #ifdef SUPPORT_UTF8
3135     /* UTF-8 mode */
3136     if (utf8)
3137     {
3138 nigel 93 register unsigned int d;
3139 nigel 77 for (i = min; i < max; i++)
3140     {
3141     int len = 1;
3142 ph10 463 if (eptr >= md->end_subject)
3143 ph10 462 {
3144 ph10 463 SCHECK_PARTIAL();
3145 ph10 462 break;
3146 ph10 463 }
3147 nigel 77 GETCHARLEN(d, eptr, len);
3148     if (d < 256) d = md->lcc[d];
3149     if (fc == d) break;
3150     eptr += len;
3151     }
3152 nigel 93 if (possessive) continue;
3153     for(;;)
3154 nigel 77 {
3155 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3156 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3157     if (eptr-- == pp) break; /* Stop if tried at original pos */
3158     BACKCHAR(eptr);
3159     }
3160     }
3161     else
3162     #endif
3163     /* Not UTF-8 mode */
3164     {
3165     for (i = min; i < max; i++)
3166     {
3167 ph10 463 if (eptr >= md->end_subject)
3168 ph10 462 {
3169     SCHECK_PARTIAL();
3170     break;
3171 ph10 463 }
3172 ph10 462 if (fc == md->lcc[*eptr]) break;
3173 nigel 77 eptr++;
3174     }
3175 nigel 93 if (possessive) continue;
3176 nigel 77 while (eptr >= pp)
3177     {
3178 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3179 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3180     eptr--;
3181     }
3182     }
3183    
3184 ph10 510 MRRETURN(MATCH_NOMATCH);
3185 nigel 77 }
3186     /* Control never gets here */
3187     }
3188    
3189     /* Caseful comparisons */
3190    
3191     else
3192     {
3193     #ifdef SUPPORT_UTF8
3194     /* UTF-8 mode */
3195     if (utf8)
3196     {
3197 nigel 93 register unsigned int d;
3198 nigel 77 for (i = 1; i <= min; i++)
3199     {
3200 ph10 426 if (eptr >= md->end_subject)
3201     {
3202     SCHECK_PARTIAL();
3203 ph10 510 MRRETURN(MATCH_NOMATCH);
3204 ph10 427 }
3205 nigel 77 GETCHARINC(d, eptr);
3206 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3207 nigel 77 }
3208     }
3209     else
3210     #endif
3211     /* Not UTF-8 mode */
3212     {
3213     for (i = 1; i <= min; i++)
3214 ph10 426 {
3215     if (eptr >= md->end_subject)
3216     {
3217     SCHECK_PARTIAL();
3218 ph10 510 MRRETURN(MATCH_NOMATCH);
3219 ph10 427 }
3220 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3221 ph10 427 }
3222 nigel 77 }
3223    
3224     if (min == max) continue;
3225    
3226     if (minimize)
3227     {
3228     #ifdef SUPPORT_UTF8
3229     /* UTF-8 mode */
3230     if (utf8)
3231     {
3232 nigel 93 register unsigned int d;
3233 nigel 77 for (fi = min;; fi++)
3234     {
3235 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3236 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3237 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3238 ph10 427 if (eptr >= md->end_subject)
3239 ph10 426 {
3240 ph10 427 SCHECK_PARTIAL();
3241 ph10 510 MRRETURN(MATCH_NOMATCH);
3242 ph10 427 }
3243 nigel 77 GETCHARINC(d, eptr);
3244 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3245 nigel 77 }
3246     }
3247     else
3248     #endif
3249     /* Not UTF-8 mode */
3250     {
3251     for (fi = min;; fi++)
3252     {
3253 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3254 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3256 ph10 426 if (eptr >= md->end_subject)
3257     {
3258     SCHECK_PARTIAL();
3259 ph10 510 MRRETURN(MATCH_NOMATCH);
3260 ph10 427 }
3261 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3262 nigel 77 }
3263     }
3264     /* Control never gets here */
3265     }
3266    
3267     /* Maximize case */
3268    
3269     else
3270     {
3271     pp = eptr;
3272    
3273     #ifdef SUPPORT_UTF8
3274     /* UTF-8 mode */
3275     if (utf8)
3276     {
3277 nigel 93 register unsigned int d;
3278 nigel 77 for (i = min; i < max; i++)
3279     {
3280     int len = 1;
3281 ph10 463 if (eptr >= md->end_subject)
3282 ph10 462 {
3283 ph10 463 SCHECK_PARTIAL();
3284 ph10 462 break;
3285 ph10 463 }
3286 nigel 77 GETCHARLEN(d, eptr, len);
3287     if (fc == d) break;
3288     eptr += len;
3289     }
3290 nigel 93 if (possessive) continue;
3291 nigel 77 for(;;)
3292     {
3293 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3294 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3295     if (eptr-- == pp) break; /* Stop if tried at original pos */
3296     BACKCHAR(eptr);
3297     }
3298     }
3299     else
3300     #endif
3301     /* Not UTF-8 mode */
3302     {
3303     for (i = min; i < max; i++)
3304     {
3305 ph10 463 if (eptr >= md->end_subject)
3306 ph10 462 {
3307 ph10 463 SCHECK_PARTIAL();
3308 ph10 462 break;
3309 ph10 463 }
3310 ph10 462 if (fc == *eptr) break;
3311 nigel 77 eptr++;
3312     }
3313 nigel 93 if (possessive) continue;
3314 nigel 77 while (eptr >= pp)
3315     {
3316 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3317 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318     eptr--;
3319     }
3320     }
3321    
3322 ph10 510 MRRETURN(MATCH_NOMATCH);
3323 nigel 77 }
3324     }
3325     /* Control never gets here */
3326    
3327     /* Match a single character type repeatedly; several different opcodes
3328     share code. This is very similar to the code for single characters, but we
3329     repeat it in the interests of efficiency. */
3330    
3331     case OP_TYPEEXACT:
3332     min = max = GET2(ecode, 1);
3333     minimize = TRUE;
3334     ecode += 3;
3335     goto REPEATTYPE;
3336    
3337     case OP_TYPEUPTO:
3338     case OP_TYPEMINUPTO:
3339     min = 0;
3340     max = GET2(ecode, 1);
3341     minimize = *ecode == OP_TYPEMINUPTO;
3342     ecode += 3;
3343     goto REPEATTYPE;
3344    
3345 nigel 93 case OP_TYPEPOSSTAR:
3346     possessive = TRUE;
3347     min = 0;
3348     max = INT_MAX;
3349     ecode++;
3350     goto REPEATTYPE;
3351    
3352     case OP_TYPEPOSPLUS:
3353     possessive = TRUE;
3354     min = 1;
3355     max = INT_MAX;
3356     ecode++;
3357     goto REPEATTYPE;
3358    
3359     case OP_TYPEPOSQUERY:
3360     possessive = TRUE;
3361     min = 0;
3362     max = 1;
3363     ecode++;
3364     goto REPEATTYPE;
3365    
3366     case OP_TYPEPOSUPTO:
3367     possessive = TRUE;
3368     min = 0;
3369     max = GET2(ecode, 1);
3370     ecode += 3;
3371     goto REPEATTYPE;
3372    
3373 nigel 77 case OP_TYPESTAR:
3374     case OP_TYPEMINSTAR:
3375     case OP_TYPEPLUS:
3376     case OP_TYPEMINPLUS:
3377     case OP_TYPEQUERY:
3378     case OP_TYPEMINQUERY:
3379     c = *ecode++ - OP_TYPESTAR;
3380     minimize = (c & 1) != 0;
3381     min = rep_min[c]; /* Pick up values from tables; */
3382     max = rep_max[c]; /* zero for max => infinity */
3383     if (max == 0) max = INT_MAX;
3384    
3385     /* Common code for all repeated single character type matches. Note that
3386     in UTF-8 mode, '.' matches a character of any length, but for the other
3387     character types, the valid characters are all one-byte long. */
3388    
3389     REPEATTYPE:
3390     ctype = *ecode++; /* Code for the character type */
3391    
3392     #ifdef SUPPORT_UCP
3393     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3394     {
3395     prop_fail_result = ctype == OP_NOTPROP;
3396     prop_type = *ecode++;
3397 nigel 87 prop_value = *ecode++;
3398 nigel 77 }
3399     else prop_type = -1;
3400     #endif
3401    
3402     /* First, ensure the minimum number of matches are present. Use inline
3403     code for maximizing the speed, and do the type test once at the start
3404 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3405 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3406     and single-bytes. */
3407    
3408     if (min > 0)
3409     {
3410     #ifdef SUPPORT_UCP
3411 nigel 87 if (prop_type >= 0)
3412 nigel 77 {
3413 nigel 87 switch(prop_type)
3414 nigel 77 {
3415 nigel 87 case PT_ANY:
3416 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3417 nigel 87 for (i = 1; i <= min; i++)
3418     {
3419 ph10 427 if (eptr >= md->end_subject)
3420 ph10 426 {
3421 ph10 427 SCHECK_PARTIAL();
3422 ph10 510 MRRETURN(MATCH_NOMATCH);
3423 ph10 427 }
3424 ph10 184 GETCHARINCTEST(c, eptr);
3425 nigel 87 }
3426     break;
3427    
3428     case PT_LAMP:
3429     for (i = 1; i <= min; i++)
3430     {
3431 ph10 427 if (eptr >= md->end_subject)
3432 ph10 426 {
3433 ph10 427 SCHECK_PARTIAL();
3434 ph10 510 MRRETURN(MATCH_NOMATCH);
3435 ph10 427 }
3436 ph10 184 GETCHARINCTEST(c, eptr);
3437 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3438 nigel 87 if ((prop_chartype == ucp_Lu ||
3439     prop_chartype == ucp_Ll ||
3440     prop_chartype == ucp_Lt) == prop_fail_result)
3441 ph10 510 MRRETURN(MATCH_NOMATCH);
3442 nigel 87 }
3443     break;
3444    
3445     case PT_GC:
3446     for (i = 1; i <= min; i++)
3447     {
3448 ph10 427 if (eptr >= md->end_subject)
3449 ph10 426 {
3450 ph10 427 SCHECK_PARTIAL();
3451 ph10 510 MRRETURN(MATCH_NOMATCH);
3452 ph10 427 }
3453 ph10 184 GETCHARINCTEST(c, eptr);
3454 ph10 349 prop_category = UCD_CATEGORY(c);
3455 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3456 ph10 510 MRRETURN(MATCH_NOMATCH);
3457 nigel 87 }
3458     break;
3459    
3460     case PT_PC:
3461     for (i = 1; i <= min; i++)
3462     {
3463 ph10 427 if (eptr >= md->end_subject)
3464 ph10 426 {
3465 ph10 427 SCHECK_PARTIAL();
3466 ph10 510 MRRETURN(MATCH_NOMATCH);
3467 ph10 427 }
3468 ph10 184 GETCHARINCTEST(c, eptr);
3469 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3470 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3471 ph10 510 MRRETURN(MATCH_NOMATCH);
3472 nigel 87 }
3473     break;
3474    
3475     case PT_SC:
3476     for (i = 1; i <= min; i++)
3477     {
3478 ph10 427 if (eptr >= md->end_subject)
3479 ph10 426 {
3480 ph10 427 SCHECK_PARTIAL();
3481 ph10 510 MRRETURN(MATCH_NOMATCH);
3482 ph10 427 }
3483 ph10 184 GETCHARINCTEST(c, eptr);
3484 ph10 349 prop_script = UCD_SCRIPT(c);
3485 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3486 ph10 510 MRRETURN(MATCH_NOMATCH);
3487 nigel 87 }
3488     break;
3489    
3490     default:
3491     RRETURN(PCRE_ERROR_INTERNAL);
3492 nigel 77 }
3493     }
3494    
3495     /* Match extended Unicode sequences. We will get here only if the
3496     support is in the binary; otherwise a compile-time error occurs. */
3497    
3498     else if (ctype == OP_EXTUNI)
3499     {
3500     for (i = 1; i <= min; i++)
3501     {
3502 ph10 427 if (eptr >= md->end_subject)
3503 ph10 426 {
3504 ph10 427 SCHECK_PARTIAL();
3505 ph10 510 MRRETURN(MATCH_NOMATCH);
3506 ph10 427 }
3507 nigel 77 GETCHARINCTEST(c, eptr);
3508 ph10 349 prop_category = UCD_CATEGORY(c);
3509 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3510 nigel 77 while (eptr < md->end_subject)
3511     {
3512     int len = 1;
3513 ph10 426 if (!utf8) c = *eptr;
3514     else { GETCHARLEN(c, eptr, len); }
3515 ph10 349 prop_category = UCD_CATEGORY(c);
3516 nigel 77 if (prop_category != ucp_M) break;
3517     eptr += len;
3518     }
3519     }
3520     }
3521    
3522     else
3523     #endif /* SUPPORT_UCP */
3524    
3525     /* Handle all other cases when the coding is UTF-8 */
3526    
3527     #ifdef SUPPORT_UTF8
3528     if (utf8) switch(ctype)
3529     {
3530     case OP_ANY:
3531     for (i = 1; i <= min; i++)
3532     {
3533 ph10 426 if (eptr >= md->end_subject)
3534     {
3535 ph10 427 SCHECK_PARTIAL();
3536 ph10 510 MRRETURN(MATCH_NOMATCH);
3537 ph10 427 }
3538 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3539 nigel 91 eptr++;
3540 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3541     }
3542     break;
3543    
3544 ph10 341 case OP_ALLANY:
3545     for (i = 1; i <= min; i++)
3546     {
3547 ph10 427 if (eptr >= md->end_subject)
3548 ph10 426 {
3549     SCHECK_PARTIAL();
3550 ph10 510 MRRETURN(MATCH_NOMATCH);
3551 ph10 427 }
3552 ph10 341 eptr++;
3553     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3554     }
3555     break;
3556    
3557 nigel 77 case OP_ANYBYTE:
3558 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3559 nigel 77 eptr += min;
3560     break;
3561    
3562 nigel 93 case OP_ANYNL:
3563     for (i = 1; i <= min; i++)
3564     {
3565 ph10 427 if (eptr >= md->end_subject)
3566 ph10 426 {
3567     SCHECK_PARTIAL();
3568 ph10 510 MRRETURN(MATCH_NOMATCH);
3569 ph10 427 }
3570 nigel 93 GETCHARINC(c, eptr);
3571     switch(c)
3572     {
3573 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3574 nigel 93 case 0x000d:
3575     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3576     break;
3577 ph10 231
3578 nigel 93 case 0x000a:
3579 ph10 231 break;
3580    
3581 nigel 93 case 0x000b:
3582     case 0x000c:
3583     case 0x0085:
3584     case 0x2028:
3585     case 0x2029:
3586 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3587 nigel 93 break;
3588     }
3589     }
3590     break;
3591    
3592 ph10 178 case OP_NOT_HSPACE:
3593     for (i = 1; i <= min; i++)
3594     {
3595 ph10 427 if (eptr >= md->end_subject)
3596 ph10 426 {
3597     SCHECK_PARTIAL();
3598 ph10 510 MRRETURN(MATCH_NOMATCH);
3599 ph10 427 }
3600 ph10 178 GETCHARINC(c, eptr);
3601     switch(c)
3602     {
3603     default: break;
3604     case 0x09: /* HT */
3605     case 0x20: /* SPACE */
3606     case 0xa0: /* NBSP */
3607     case 0x1680: /* OGHAM SPACE MARK */
3608     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3609     case 0x2000: /* EN QUAD */
3610     case 0x2001: /* EM QUAD */
3611     case 0x2002: /* EN SPACE */
3612     case 0x2003: /* EM SPACE */
3613     case 0x2004: /* THREE-PER-EM SPACE */
3614     case 0x2005: /* FOUR-PER-EM SPACE */
3615     case 0x2006: /* SIX-PER-EM SPACE */
3616     case 0x2007: /* FIGURE SPACE */
3617     case 0x2008: /* PUNCTUATION SPACE */
3618     case 0x2009: /* THIN SPACE */
3619     case 0x200A: /* HAIR SPACE */
3620     case 0x202f: /* NARROW NO-BREAK SPACE */
3621     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3622     case 0x3000: /* IDEOGRAPHIC SPACE */
3623 ph10 510 MRRETURN(MATCH_NOMATCH);
3624 ph10 178 }
3625     }
3626     break;
3627 ph10 182
3628 ph10 178 case OP_HSPACE:
3629     for (i = 1; i <= min; i++)
3630     {
3631 ph10 427 if (eptr >= md->end_subject)
3632 ph10 426 {
3633 ph10 427 SCHECK_PARTIAL();
3634 ph10 510 MRRETURN(MATCH_NOMATCH);
3635 ph10 427 }
3636 ph10 178 GETCHARINC(c, eptr);
3637     switch(c)
3638     {
3639 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3640 ph10 178 case 0x09: /* HT */
3641     case 0x20: /* SPACE */
3642     case 0xa0: /* NBSP */
3643     case 0x1680: /* OGHAM SPACE MARK */
3644     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3645     case 0x2000: /* EN QUAD */
3646     case 0x2001: /* EM QUAD */
3647     case 0x2002: /* EN SPACE */
3648     case 0x2003: /* EM SPACE */
3649     case 0x2004: /* THREE-PER-EM SPACE */
3650     case 0x2005: /* FOUR-PER-EM SPACE */
3651     case 0x2006: /* SIX-PER-EM SPACE */
3652     case 0x2007: /* FIGURE SPACE */
3653     case 0x2008: /* PUNCTUATION SPACE */
3654     case 0x2009: /* THIN SPACE */
3655     case 0x200A: /* HAIR SPACE */
3656     case 0x202f: /* NARROW NO-BREAK SPACE */
3657     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3658     case 0x3000: /* IDEOGRAPHIC SPACE */
3659     break;
3660     }
3661     }
3662     break;
3663 ph10 182
3664 ph10 178 case OP_NOT_VSPACE:
3665     for (i = 1; i <= min; i++)
3666     {
3667 ph10 427 if (eptr >= md->end_subject)
3668 ph10 426 {
3669 ph10 427 SCHECK_PARTIAL();
3670 ph10 510 MRRETURN(MATCH_NOMATCH);
3671 ph10 427 }
3672 ph10 178 GETCHARINC(c, eptr);
3673     switch(c)
3674     {
3675     default: break;
3676     case 0x0a: /* LF */
3677     case 0x0b: /* VT */
3678     case 0x0c: /* FF */
3679     case 0x0d: /* CR */
3680     case 0x85: /* NEL */
3681     case 0x2028: /* LINE SEPARATOR */
3682     case 0x2029: /* PARAGRAPH SEPARATOR */
3683 ph10 510 MRRETURN(MATCH_NOMATCH);
3684 ph10 178 }
3685     }
3686     break;
3687 ph10 182
3688 ph10 178 case OP_VSPACE:
3689     for (i = 1; i <= min; i++)
3690     {
3691 ph10 427 if (eptr >= md->end_subject)
3692 ph10 426 {
3693 ph10 427 SCHECK_PARTIAL();
3694 ph10 510 MRRETURN(MATCH_NOMATCH);
3695 ph10 427 }
3696 ph10 178 GETCHARINC(c, eptr);
3697     switch(c)
3698     {
3699 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3700 ph10 178 case 0x0a: /* LF */
3701     case 0x0b: /* VT */
3702     case 0x0c: /* FF */
3703     case 0x0d: /* CR */
3704     case 0x85: /* NEL */
3705     case 0x2028: /* LINE SEPARATOR */
3706     case 0x2029: /* PARAGRAPH SEPARATOR */
3707 ph10 182 break;
3708 ph10 178 }
3709     }
3710     break;
3711    
3712 nigel 77 case OP_NOT_DIGIT:
3713     for (i = 1; i <= min; i++)
3714     {
3715 ph10 427 if (eptr >= md->end_subject)
3716 ph10 426 {
3717 ph10 427 SCHECK_PARTIAL();
3718 ph10 510 MRRETURN(MATCH_NOMATCH);
3719 ph10 427 }
3720 nigel 77 GETCHARINC(c, eptr);
3721     if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3722 ph10 510 MRRETURN(MATCH_NOMATCH);
3723 nigel 77 }
3724     break;
3725    
3726     case OP_DIGIT:
3727     for (i = 1; i <= min; i++)
3728     {
3729 ph10 427 if (eptr >= md->end_subject)
3730 ph10 426 {
3731 ph10 427 SCHECK_PARTIAL();
3732 ph10 510 MRRETURN(MATCH_NOMATCH);
3733 ph10 427 }
3734 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3735 ph10 510 MRRETURN(MATCH_NOMATCH);
3736 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3737     }
3738     break;
3739    
3740     case OP_NOT_WHITESPACE:
3741     for (i = 1; i <= min; i++)
3742     {
3743 ph10 427 if (eptr >= md->end_subject)
3744 ph10 426 {
3745 ph10 427 SCHECK_PARTIAL();
3746 ph10 510 MRRETURN(MATCH_NOMATCH);
3747 ph10 427 }
3748 ph10 426 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3749 ph10 510 MRRETURN(MATCH_NOMATCH);
3750 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3751 nigel 77 }
3752     break;
3753    
3754     case OP_WHITESPACE:
3755     for (i = 1; i <= min; i++)
3756     {
3757 ph10 427 if (eptr >= md->end_subject)
3758 ph10 426 {
3759 ph10 427 SCHECK_PARTIAL();
3760 ph10 510 MRRETURN(MATCH_NOMATCH);
3761 ph10 427 }
3762 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3763 ph10 510 MRRETURN(MATCH_NOMATCH);
3764 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3765     }
3766     break;
3767    
3768     case OP_NOT_WORDCHAR:
3769     for (i = 1; i <= min; i++)
3770     {
3771 ph10 482 if (eptr >= md->end_subject)
3772     {
3773     SCHECK_PARTIAL();
3774 ph10 510 MRRETURN(MATCH_NOMATCH);
3775 ph10 482 }
3776     if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3777 ph10 510 MRRETURN(MATCH_NOMATCH);
3778 ph10 219 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3779 nigel 77 }
3780     break;
3781    
3782     case OP_WORDCHAR:
3783     for (i = 1; i <= min; i++)
3784     {
3785 ph10 427 if (eptr >= md->end_subject)
3786 ph10 426 {
3787 ph10 427 SCHECK_PARTIAL();
3788 ph10 510 MRRETURN(MATCH_NOMATCH);
3789 ph10 427 }
3790 ph10 426 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3791 ph10 510 MRRETURN(MATCH_NOMATCH);
3792 nigel 77 /* No need to skip more bytes - we know it's a 1-byte character */
3793     }
3794     break;
3795    
3796     default:
3797     RRETURN(PCRE_ERROR_INTERNAL);
3798     } /* End switch(ctype) */
3799    
3800     else
3801     #endif /* SUPPORT_UTF8 */
3802    
3803     /* Code for the non-UTF-8 case for minimum matching of operators other
3804 ph10 426 than OP_PROP and OP_NOTPROP. */
3805 nigel 77
3806     switch(ctype)
3807     {
3808     case OP_ANY:
3809 ph10 342 for (i = 1; i <= min; i++)
3810 nigel 77 {
3811 ph10 427 if (eptr >= md->end_subject)
3812 ph10 426 {
3813 ph10 427 SCHECK_PARTIAL();
3814 ph10 510 MRRETURN(MATCH_NOMATCH);
3815 ph10 427 }
3816 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3817 ph10 342 eptr++;
3818 nigel 77 }
3819     break;
3820    
3821 ph10 341 case OP_ALLANY:
3822 ph10 443 if (eptr > md->end_subject - min)
3823 ph10 428 {
3824 ph10 443 SCHECK_PARTIAL();
3825 ph10 510 MRRETURN(MATCH_NOMATCH);
3826 ph10 443 }
3827 ph10 341 eptr += min;
3828     break;
3829    
3830 nigel 77 case OP_ANYBYTE:
3831 ph10 443 if (eptr > md->end_subject - min)
3832 ph10 428 {
3833 ph10 443 SCHECK_PARTIAL();
3834 ph10 510 MRRETURN(MATCH_NOMATCH);
3835 ph10 443 }
3836 nigel 77 eptr += min;
3837     break;
3838    
3839 nigel 93 case OP_ANYNL:
3840     for (i = 1; i <= min; i++)
3841     {
3842 ph10 427 if (eptr >= md->end_subject)
3843 ph10 426 {
3844 ph10 427 SCHECK_PARTIAL();
3845 ph10 510 MRRETURN(MATCH_NOMATCH);
3846 ph10 427 }
3847 nigel 93 switch(*eptr++)
3848     {
3849 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3850 nigel 93 case 0x000d:
3851     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3852     break;
3853     case 0x000a:
3854 ph10 231 break;
3855    
3856 nigel 93 case 0x000b:
3857     case 0x000c:
3858     case 0x0085:
3859 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3860 nigel 93 break;
3861     }
3862     }
3863     break;
3864    
3865 ph10 178 case OP_NOT_HSPACE:
3866     for (i = 1; i <= min; i++)
3867     {
3868 ph10 427 if (eptr >= md->end_subject)
3869 ph10 426 {
3870 ph10 427 SCHECK_PARTIAL();
3871 ph10 510