/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 527 - (hide annotations) (download)
Sat May 29 15:50:39 2010 UTC (4 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 184892 byte(s)
Fix oversight for no-recurse version.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 473 Copyright (c) 1997-2010 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains pcre_exec(), the externally visible function that does
42     pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43     possible. There are also some static supporting functions. */
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 93 #define NLBLOCK md /* Block containing newline information */
50     #define PSSTART start_subject /* Field containing processed string start */
51     #define PSEND end_subject /* Field containing processed string end */
52    
53 nigel 77 #include "pcre_internal.h"
54    
55 ph10 137 /* Undefine some potentially clashing cpp symbols */
56    
57     #undef min
58     #undef max
59    
60 nigel 77 /* Flag bits for the match() function */
61    
62 nigel 93 #define match_condassert 0x01 /* Called to check a condition assertion */
63     #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64 nigel 77
65     /* Non-error returns from the match() function. Error returns are externally
66     defined PCRE_ERROR_xxx codes, which are all negative. */
67    
68     #define MATCH_MATCH 1
69     #define MATCH_NOMATCH 0
70    
71 ph10 211 /* Special internal returns from the match() function. Make them sufficiently
72 ph10 210 negative to avoid the external error codes. */
73    
74 ph10 511 #define MATCH_ACCEPT (-999)
75     #define MATCH_COMMIT (-998)
76     #define MATCH_PRUNE (-997)
77     #define MATCH_SKIP (-996)
78     #define MATCH_SKIP_ARG (-995)
79     #define MATCH_THEN (-994)
80 ph10 210
81 ph10 510 /* This is a convenience macro for code that occurs many times. */
82    
83     #define MRRETURN(ra) \
84     { \
85     md->mark = markptr; \
86     RRETURN(ra); \
87     }
88    
89 nigel 77 /* Maximum number of ints of offset to save on the stack for recursive calls.
90     If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91     because the offset vector is always a multiple of 3 long. */
92    
93     #define REC_STACK_SAVE_MAX 30
94    
95     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96    
97     static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99    
100    
101    
102 ph10 475 #ifdef PCRE_DEBUG
103 nigel 77 /*************************************************
104     * Debugging function to print chars *
105     *************************************************/
106    
107     /* Print a sequence of chars in printable format, stopping at the end of the
108     subject if the requested.
109    
110     Arguments:
111     p points to characters
112     length number to print
113     is_subject TRUE if printing from within md->start_subject
114     md pointer to matching data block, if is_subject is TRUE
115    
116     Returns: nothing
117     */
118    
119     static void
120     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121     {
122 nigel 93 unsigned int c;
123 nigel 77 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124     while (length-- > 0)
125     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126     }
127     #endif
128    
129    
130    
131     /*************************************************
132     * Match a back-reference *
133     *************************************************/
134    
135     /* If a back reference hasn't been set, the length that is passed is greater
136     than the number of characters left in the string, so the match fails.
137    
138     Arguments:
139     offset index into the offset vector
140     eptr points into the subject
141     length length to be matched
142     md points to match data block
143     ims the ims flags
144    
145     Returns: TRUE if matched
146     */
147    
148     static BOOL
149 nigel 87 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 nigel 77 unsigned long int ims)
151     {
152 nigel 87 USPTR p = md->start_subject + md->offset_vector[offset];
153 nigel 77
154 ph10 475 #ifdef PCRE_DEBUG
155 nigel 77 if (eptr >= md->end_subject)
156     printf("matching subject <null>");
157     else
158     {
159     printf("matching subject ");
160     pchars(eptr, length, TRUE, md);
161     }
162     printf(" against backref ");
163     pchars(p, length, FALSE, md);
164     printf("\n");
165     #endif
166    
167     /* Always fail if not enough characters left */
168    
169     if (length > md->end_subject - eptr) return FALSE;
170    
171 ph10 354 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172     properly if Unicode properties are supported. Otherwise, we can check only
173     ASCII characters. */
174 nigel 77
175     if ((ims & PCRE_CASELESS) != 0)
176     {
177 ph10 354 #ifdef SUPPORT_UTF8
178     #ifdef SUPPORT_UCP
179     if (md->utf8)
180     {
181 ph10 358 USPTR endptr = eptr + length;
182 ph10 354 while (eptr < endptr)
183     {
184 ph10 358 int c, d;
185 ph10 354 GETCHARINC(c, eptr);
186     GETCHARINC(d, p);
187     if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 ph10 358 }
189     }
190 ph10 354 else
191     #endif
192     #endif
193    
194     /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195     is no UCP support. */
196 ph10 358
197 nigel 77 while (length-- > 0)
198 ph10 354 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 nigel 77 }
200 ph10 358
201 ph10 354 /* In the caseful case, we can just compare the bytes, whether or not we
202     are in UTF-8 mode. */
203 ph10 358
204 nigel 77 else
205     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
207     return TRUE;
208     }
209    
210    
211    
212     /***************************************************************************
213     ****************************************************************************
214     RECURSION IN THE match() FUNCTION
215    
216 nigel 87 The match() function is highly recursive, though not every recursive call
217     increases the recursive depth. Nevertheless, some regular expressions can cause
218     it to recurse to a great depth. I was writing for Unix, so I just let it call
219     itself recursively. This uses the stack for saving everything that has to be
220     saved for a recursive call. On Unix, the stack can be large, and this works
221     fine.
222 nigel 77
223 nigel 87 It turns out that on some non-Unix-like systems there are problems with
224     programs that use a lot of stack. (This despite the fact that every last chip
225     has oodles of memory these days, and techniques for extending the stack have
226     been known for decades.) So....
227 nigel 77
228     There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229     calls by keeping local variables that need to be preserved in blocks of memory
230 nigel 87 obtained from malloc() instead instead of on the stack. Macros are used to
231 nigel 77 achieve this so that the actual code doesn't look very different to what it
232     always used to.
233 ph10 164
234 ph10 165 The original heap-recursive code used longjmp(). However, it seems that this
235 ph10 164 can be very slow on some operating systems. Following a suggestion from Stan
236     Switzer, the use of longjmp() has been abolished, at the cost of having to
237     provide a unique number for each call to RMATCH. There is no way of generating
238     a sequence of numbers at compile time in C. I have given them names, to make
239     them stand out more clearly.
240    
241     Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242     FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 ph10 165 tests. Furthermore, not using longjmp() means that local dynamic variables
244     don't have indeterminate values; this has meant that the frame size can be
245 ph10 164 reduced because the result can be "passed back" by straight setting of the
246     variable instead of being passed in the frame.
247 nigel 77 ****************************************************************************
248     ***************************************************************************/
249    
250 ph10 212 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251     below must be updated in sync. */
252 nigel 77
253 ph10 164 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254     RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255     RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256     RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 ph10 210 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 ph10 527 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259     RM61, RM62 };
260 ph10 164
261 nigel 87 /* These versions of the macros use the stack, as normal. There are debugging
262 ph10 165 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 ph10 501 actually used in this definition. */
264 nigel 77
265     #ifndef NO_RECURSE
266     #define REGISTER register
267 ph10 164
268 ph10 475 #ifdef PCRE_DEBUG
269 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 nigel 87 { \
271     printf("match() called in line %d\n", __LINE__); \
272 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 nigel 87 printf("to line %d\n", __LINE__); \
274     }
275     #define RRETURN(ra) \
276     { \
277     printf("match() returned %d from line %d ", ra, __LINE__); \
278     return ra; \
279     }
280     #else
281 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 ph10 501 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 nigel 77 #define RRETURN(ra) return ra
284 nigel 87 #endif
285    
286 nigel 77 #else
287    
288    
289 ph10 164 /* These versions of the macros manage a private stack on the heap. Note that
290     the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291     argument of match(), which never changes. */
292 nigel 77
293     #define REGISTER
294    
295 ph10 164 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 nigel 77 {\
297     heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298 ph10 164 frame->Xwhere = rw; \
299     newframe->Xeptr = ra;\
300     newframe->Xecode = rb;\
301 ph10 168 newframe->Xmstart = mstart;\
302 ph10 501 newframe->Xmarkptr = markptr;\
303 ph10 164 newframe->Xoffset_top = rc;\
304     newframe->Xims = re;\
305     newframe->Xeptrb = rf;\
306     newframe->Xflags = rg;\
307     newframe->Xrdepth = frame->Xrdepth + 1;\
308     newframe->Xprevframe = frame;\
309     frame = newframe;\
310     DPRINTF(("restarting from line %d\n", __LINE__));\
311     goto HEAP_RECURSE;\
312     L_##rw:\
313     DPRINTF(("jumped back to line %d\n", __LINE__));\
314 nigel 77 }
315    
316     #define RRETURN(ra)\
317     {\
318 ph10 527 heapframe *oldframe = frame;\
319     frame = oldframe->Xprevframe;\
320     (pcre_stack_free)(oldframe);\
321 nigel 77 if (frame != NULL)\
322     {\
323 ph10 164 rrc = ra;\
324     goto HEAP_RETURN;\
325 nigel 77 }\
326     return ra;\
327     }
328    
329    
330     /* Structure for remembering the local variables in a private frame */
331    
332     typedef struct heapframe {
333     struct heapframe *Xprevframe;
334    
335     /* Function arguments that may change */
336    
337 ph10 409 USPTR Xeptr;
338 nigel 77 const uschar *Xecode;
339 ph10 409 USPTR Xmstart;
340 ph10 501 USPTR Xmarkptr;
341 nigel 77 int Xoffset_top;
342     long int Xims;
343     eptrblock *Xeptrb;
344     int Xflags;
345 nigel 91 unsigned int Xrdepth;
346 nigel 77
347     /* Function local variables */
348    
349 ph10 409 USPTR Xcallpat;
350 ph10 406 #ifdef SUPPORT_UTF8
351 ph10 409 USPTR Xcharptr;
352 ph10 406 #endif
353 ph10 409 USPTR Xdata;
354     USPTR Xnext;
355     USPTR Xpp;
356     USPTR Xprev;
357     USPTR Xsaved_eptr;
358 nigel 77
359     recursion_info Xnew_recursive;
360    
361     BOOL Xcur_is_word;
362     BOOL Xcondition;
363     BOOL Xprev_is_word;
364    
365     unsigned long int Xoriginal_ims;
366    
367     #ifdef SUPPORT_UCP
368     int Xprop_type;
369 nigel 87 int Xprop_value;
370 nigel 77 int Xprop_fail_result;
371     int Xprop_category;
372     int Xprop_chartype;
373 nigel 87 int Xprop_script;
374 ph10 123 int Xoclength;
375     uschar Xocchars[8];
376 nigel 77 #endif
377    
378 ph10 403 int Xcodelink;
379 nigel 77 int Xctype;
380 nigel 93 unsigned int Xfc;
381 nigel 77 int Xfi;
382     int Xlength;
383     int Xmax;
384     int Xmin;
385     int Xnumber;
386     int Xoffset;
387     int Xop;
388     int Xsave_capture_last;
389     int Xsave_offset1, Xsave_offset2, Xsave_offset3;
390     int Xstacksave[REC_STACK_SAVE_MAX];
391    
392     eptrblock Xnewptrb;
393    
394 ph10 164 /* Where to jump back to */
395 nigel 77
396 ph10 164 int Xwhere;
397 ph10 165
398 nigel 77 } heapframe;
399    
400     #endif
401    
402    
403     /***************************************************************************
404     ***************************************************************************/
405    
406    
407    
408     /*************************************************
409     * Match from current position *
410     *************************************************/
411    
412 nigel 93 /* This function is called recursively in many circumstances. Whenever it
413 nigel 77 returns a negative (error) response, the outer incarnation must also return the
414 ph10 426 same response. */
415 nigel 77
416 ph10 426 /* These macros pack up tests that are used for partial matching, and which
417     appears several times in the code. We set the "hit end" flag if the pointer is
418     at the end of the subject and also past the start of the subject (i.e.
419 ph10 427 something has been matched). For hard partial matching, we then return
420     immediately. The second one is used when we already know we are past the end of
421     the subject. */
422 ph10 426
423     #define CHECK_PARTIAL()\
424 ph10 435 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
425 ph10 427 {\
426     md->hitend = TRUE;\
427 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
428 ph10 427 }
429 ph10 426
430     #define SCHECK_PARTIAL()\
431 ph10 462 if (md->partial != 0 && eptr > mstart)\
432 ph10 427 {\
433     md->hitend = TRUE;\
434 ph10 510 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
435 ph10 427 }
436 ph10 426
437 ph10 427
438 ph10 426 /* Performance note: It might be tempting to extract commonly used fields from
439     the md structure (e.g. utf8, end_subject) into individual variables to improve
440 nigel 77 performance. Tests using gcc on a SPARC disproved this; in the first case, it
441     made performance worse.
442    
443     Arguments:
444 nigel 93 eptr pointer to current character in subject
445     ecode pointer to current position in compiled code
446 ph10 168 mstart pointer to the current match start position (can be modified
447 ph10 172 by encountering \K)
448 ph10 501 markptr pointer to the most recent MARK name, or NULL
449 nigel 77 offset_top current top pointer
450     md pointer to "static" info for the match
451     ims current /i, /m, and /s options
452     eptrb pointer to chain of blocks containing eptr at start of
453     brackets - for testing for empty matches
454     flags can contain
455     match_condassert - this is an assertion condition
456 nigel 93 match_cbegroup - this is the start of an unlimited repeat
457     group that can match an empty string
458 nigel 87 rdepth the recursion depth
459 nigel 77
460     Returns: MATCH_MATCH if matched ) these values are >= 0
461     MATCH_NOMATCH if failed to match )
462 ph10 510 a negative MATCH_xxx value for PRUNE, SKIP, etc
463 nigel 77 a negative PCRE_ERROR_xxx value if aborted by an error condition
464 nigel 87 (e.g. stopped by repeated call or recursion limit)
465 nigel 77 */
466    
467     static int
468 ph10 510 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
469     const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
470 ph10 501 eptrblock *eptrb, int flags, unsigned int rdepth)
471 nigel 77 {
472     /* These variables do not need to be preserved over recursion in this function,
473 nigel 93 so they can be ordinary variables in all cases. Mark some of them with
474     "register" because they are used a lot in loops. */
475 nigel 77
476 nigel 91 register int rrc; /* Returns from recursive calls */
477     register int i; /* Used for loops not involving calls to RMATCH() */
478 nigel 93 register unsigned int c; /* Character values not kept over RMATCH() calls */
479 nigel 91 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
480 nigel 77
481 nigel 93 BOOL minimize, possessive; /* Quantifier options */
482 ph10 403 int condcode;
483 nigel 93
484 nigel 77 /* When recursion is not being used, all "local" variables that have to be
485     preserved over calls to RMATCH() are part of a "frame" which is obtained from
486     heap storage. Set up the top-level frame here; others are obtained from the
487     heap whenever RMATCH() does a "recursion". See the macro definitions above. */
488    
489     #ifdef NO_RECURSE
490     heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
491     frame->Xprevframe = NULL; /* Marks the top level */
492    
493     /* Copy in the original argument variables */
494    
495     frame->Xeptr = eptr;
496     frame->Xecode = ecode;
497 ph10 168 frame->Xmstart = mstart;
498 ph10 501 frame->Xmarkptr = markptr;
499 nigel 77 frame->Xoffset_top = offset_top;
500     frame->Xims = ims;
501     frame->Xeptrb = eptrb;
502     frame->Xflags = flags;
503 nigel 87 frame->Xrdepth = rdepth;
504 nigel 77
505     /* This is where control jumps back to to effect "recursion" */
506    
507     HEAP_RECURSE:
508    
509     /* Macros make the argument variables come from the current frame */
510    
511     #define eptr frame->Xeptr
512     #define ecode frame->Xecode
513 ph10 168 #define mstart frame->Xmstart
514 ph10 501 #define markptr frame->Xmarkptr
515 nigel 77 #define offset_top frame->Xoffset_top
516     #define ims frame->Xims
517     #define eptrb frame->Xeptrb
518     #define flags frame->Xflags
519 nigel 87 #define rdepth frame->Xrdepth
520 nigel 77
521     /* Ditto for the local variables */
522    
523     #ifdef SUPPORT_UTF8
524     #define charptr frame->Xcharptr
525     #endif
526     #define callpat frame->Xcallpat
527 ph10 403 #define codelink frame->Xcodelink
528 nigel 77 #define data frame->Xdata
529     #define next frame->Xnext
530     #define pp frame->Xpp
531     #define prev frame->Xprev
532     #define saved_eptr frame->Xsaved_eptr
533    
534     #define new_recursive frame->Xnew_recursive
535    
536     #define cur_is_word frame->Xcur_is_word
537     #define condition frame->Xcondition
538     #define prev_is_word frame->Xprev_is_word
539    
540     #define original_ims frame->Xoriginal_ims
541    
542     #ifdef SUPPORT_UCP
543     #define prop_type frame->Xprop_type
544 nigel 87 #define prop_value frame->Xprop_value
545 nigel 77 #define prop_fail_result frame->Xprop_fail_result
546     #define prop_category frame->Xprop_category
547     #define prop_chartype frame->Xprop_chartype
548 nigel 87 #define prop_script frame->Xprop_script
549 ph10 115 #define oclength frame->Xoclength
550     #define occhars frame->Xocchars
551 nigel 77 #endif
552    
553     #define ctype frame->Xctype
554     #define fc frame->Xfc
555     #define fi frame->Xfi
556     #define length frame->Xlength
557     #define max frame->Xmax
558     #define min frame->Xmin
559     #define number frame->Xnumber
560     #define offset frame->Xoffset
561     #define op frame->Xop
562     #define save_capture_last frame->Xsave_capture_last
563     #define save_offset1 frame->Xsave_offset1
564     #define save_offset2 frame->Xsave_offset2
565     #define save_offset3 frame->Xsave_offset3
566     #define stacksave frame->Xstacksave
567    
568     #define newptrb frame->Xnewptrb
569    
570     /* When recursion is being used, local variables are allocated on the stack and
571     get preserved during recursion in the normal way. In this environment, fi and
572     i, and fc and c, can be the same variables. */
573    
574 nigel 93 #else /* NO_RECURSE not defined */
575 nigel 77 #define fi i
576     #define fc c
577    
578    
579 nigel 87 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
580     const uschar *charptr; /* in small blocks of the code. My normal */
581     #endif /* style of coding would have declared */
582     const uschar *callpat; /* them within each of those blocks. */
583     const uschar *data; /* However, in order to accommodate the */
584     const uschar *next; /* version of this code that uses an */
585     USPTR pp; /* external "stack" implemented on the */
586     const uschar *prev; /* heap, it is easier to declare them all */
587     USPTR saved_eptr; /* here, so the declarations can be cut */
588     /* out in a block. The only declarations */
589     recursion_info new_recursive; /* within blocks below are for variables */
590     /* that do not have to be preserved over */
591     BOOL cur_is_word; /* a recursive call to RMATCH(). */
592     BOOL condition;
593 nigel 77 BOOL prev_is_word;
594    
595     unsigned long int original_ims;
596    
597     #ifdef SUPPORT_UCP
598     int prop_type;
599 nigel 87 int prop_value;
600 nigel 77 int prop_fail_result;
601     int prop_category;
602     int prop_chartype;
603 nigel 87 int prop_script;
604 ph10 115 int oclength;
605     uschar occhars[8];
606 nigel 77 #endif
607    
608 ph10 399 int codelink;
609 nigel 77 int ctype;
610     int length;
611     int max;
612     int min;
613     int number;
614     int offset;
615     int op;
616     int save_capture_last;
617     int save_offset1, save_offset2, save_offset3;
618     int stacksave[REC_STACK_SAVE_MAX];
619    
620     eptrblock newptrb;
621 nigel 93 #endif /* NO_RECURSE */
622 nigel 77
623     /* These statements are here to stop the compiler complaining about unitialized
624     variables. */
625    
626     #ifdef SUPPORT_UCP
627 nigel 87 prop_value = 0;
628 nigel 77 prop_fail_result = 0;
629     #endif
630    
631 nigel 93
632 nigel 91 /* This label is used for tail recursion, which is used in a few cases even
633     when NO_RECURSE is not defined, in order to reduce the amount of stack that is
634     used. Thanks to Ian Taylor for noticing this possibility and sending the
635     original patch. */
636    
637     TAIL_RECURSE:
638    
639 nigel 87 /* OK, now we can get on with the real code of the function. Recursive calls
640     are specified by the macro RMATCH and RRETURN is used to return. When
641     NO_RECURSE is *not* defined, these just turn into a recursive call to match()
642 ph10 475 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
643 nigel 87 defined). However, RMATCH isn't like a function call because it's quite a
644     complicated macro. It has to be used in one particular way. This shouldn't,
645     however, impact performance when true recursion is being used. */
646 nigel 77
647 ph10 164 #ifdef SUPPORT_UTF8
648     utf8 = md->utf8; /* Local copy of the flag */
649     #else
650     utf8 = FALSE;
651     #endif
652    
653 nigel 87 /* First check that we haven't called match() too many times, or that we
654     haven't exceeded the recursive call limit. */
655    
656 nigel 77 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
657 nigel 87 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
658 nigel 77
659     original_ims = ims; /* Save for resetting on ')' */
660 nigel 91
661 nigel 93 /* At the start of a group with an unlimited repeat that may match an empty
662     string, the match_cbegroup flag is set. When this is the case, add the current
663     subject pointer to the chain of such remembered pointers, to be checked when we
664     hit the closing ket, in order to break infinite loops that match no characters.
665 ph10 197 When match() is called in other circumstances, don't add to the chain. The
666     match_cbegroup flag must NOT be used with tail recursion, because the memory
667     block that is used is on the stack, so a new one may be required for each
668     match(). */
669 nigel 77
670 nigel 93 if ((flags & match_cbegroup) != 0)
671 nigel 77 {
672 ph10 197 newptrb.epb_saved_eptr = eptr;
673     newptrb.epb_prev = eptrb;
674     eptrb = &newptrb;
675 nigel 77 }
676    
677 nigel 93 /* Now start processing the opcodes. */
678 nigel 77
679     for (;;)
680     {
681 nigel 93 minimize = possessive = FALSE;
682 nigel 77 op = *ecode;
683 ph10 443
684 nigel 93 switch(op)
685     {
686 ph10 510 case OP_MARK:
687     markptr = ecode + 2;
688     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
689 ph10 512 ims, eptrb, flags, RM55);
690    
691     /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
692     argument, and we must check whether that argument matches this MARK's
693     argument. It is passed back in md->start_match_ptr (an overloading of that
694     variable). If it does match, we reset that variable to the current subject
695     position and return MATCH_SKIP. Otherwise, pass back the return code
696 ph10 510 unaltered. */
697 ph10 512
698     if (rrc == MATCH_SKIP_ARG &&
699 ph10 510 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
700     {
701     md->start_match_ptr = eptr;
702     RRETURN(MATCH_SKIP);
703     }
704    
705 ph10 512 if (md->mark == NULL) md->mark = markptr;
706 ph10 510 RRETURN(rrc);
707    
708 ph10 210 case OP_FAIL:
709 ph10 510 MRRETURN(MATCH_NOMATCH);
710 ph10 211
711 ph10 510 case OP_COMMIT:
712     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
713     ims, eptrb, flags, RM52);
714     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
715     MRRETURN(MATCH_COMMIT);
716    
717 ph10 210 case OP_PRUNE:
718     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
719     ims, eptrb, flags, RM51);
720     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
721 ph10 510 MRRETURN(MATCH_PRUNE);
722 ph10 211
723 ph10 510 case OP_PRUNE_ARG:
724     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
725 ph10 512 ims, eptrb, flags, RM56);
726 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
727 ph10 510 md->mark = ecode + 2;
728     RRETURN(MATCH_PRUNE);
729 ph10 211
730 ph10 210 case OP_SKIP:
731     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
732     ims, eptrb, flags, RM53);
733     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
734 ph10 211 md->start_match_ptr = eptr; /* Pass back current position */
735 ph10 510 MRRETURN(MATCH_SKIP);
736 ph10 211
737 ph10 510 case OP_SKIP_ARG:
738     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
739 ph10 512 ims, eptrb, flags, RM57);
740 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
741 ph10 512
742     /* Pass back the current skip name by overloading md->start_match_ptr and
743     returning the special MATCH_SKIP_ARG return code. This will either be
744     caught by a matching MARK, or get to the top, where it is treated the same
745 ph10 510 as PRUNE. */
746 ph10 512
747 ph10 510 md->start_match_ptr = ecode + 2;
748 ph10 512 RRETURN(MATCH_SKIP_ARG);
749    
750 ph10 210 case OP_THEN:
751     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
752 ph10 212 ims, eptrb, flags, RM54);
753 ph10 210 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
754 ph10 510 MRRETURN(MATCH_THEN);
755    
756     case OP_THEN_ARG:
757     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
758 ph10 512 ims, eptrb, flags, RM58);
759 ph10 510 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
760     md->mark = ecode + 2;
761 ph10 212 RRETURN(MATCH_THEN);
762 ph10 211
763 nigel 93 /* Handle a capturing bracket. If there is space in the offset vector, save
764     the current subject position in the working slot at the top of the vector.
765     We mustn't change the current values of the data slot, because they may be
766     set from a previous iteration of this group, and be referred to by a
767     reference inside the group.
768 nigel 77
769 nigel 93 If the bracket fails to match, we need to restore this value and also the
770     values of the final offsets, in case they were set by a previous iteration
771     of the same bracket.
772 nigel 77
773 nigel 93 If there isn't enough space in the offset vector, treat this as if it were
774     a non-capturing bracket. Don't worry about setting the flag for the error
775     case here; that is handled in the code for KET. */
776 nigel 77
777 nigel 93 case OP_CBRA:
778     case OP_SCBRA:
779     number = GET2(ecode, 1+LINK_SIZE);
780 nigel 77 offset = number << 1;
781    
782 ph10 475 #ifdef PCRE_DEBUG
783 nigel 93 printf("start bracket %d\n", number);
784     printf("subject=");
785 nigel 77 pchars(eptr, 16, TRUE, md);
786     printf("\n");
787     #endif
788    
789     if (offset < md->offset_max)
790     {
791     save_offset1 = md->offset_vector[offset];
792     save_offset2 = md->offset_vector[offset+1];
793     save_offset3 = md->offset_vector[md->offset_end - number];
794     save_capture_last = md->capture_last;
795    
796     DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
797     md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
798    
799 nigel 93 flags = (op == OP_SCBRA)? match_cbegroup : 0;
800 nigel 77 do
801     {
802 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
803     ims, eptrb, flags, RM1);
804 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
805 nigel 77 md->capture_last = save_capture_last;
806     ecode += GET(ecode, 1);
807     }
808     while (*ecode == OP_ALT);
809    
810     DPRINTF(("bracket %d failed\n", number));
811    
812     md->offset_vector[offset] = save_offset1;
813     md->offset_vector[offset+1] = save_offset2;
814     md->offset_vector[md->offset_end - number] = save_offset3;
815    
816 ph10 510 if (rrc != MATCH_THEN) md->mark = markptr;
817 nigel 77 RRETURN(MATCH_NOMATCH);
818     }
819    
820 ph10 197 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
821     as a non-capturing bracket. */
822 nigel 77
823 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
824     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
825    
826 nigel 93 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
827 nigel 77
828 ph10 197 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
829     /* VVVVVVVVVVVVVVVVVVVVVVVVV */
830    
831 nigel 93 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
832     final alternative within the brackets, we would return the result of a
833     recursive call to match() whatever happened. We can reduce stack usage by
834 ph10 197 turning this into a tail recursion, except in the case when match_cbegroup
835     is set.*/
836 nigel 77
837 nigel 93 case OP_BRA:
838     case OP_SBRA:
839     DPRINTF(("start non-capturing bracket\n"));
840     flags = (op >= OP_SBRA)? match_cbegroup : 0;
841 nigel 91 for (;;)
842 nigel 77 {
843 ph10 197 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
844 nigel 93 {
845 ph10 197 if (flags == 0) /* Not a possibly empty group */
846     {
847     ecode += _pcre_OP_lengths[*ecode];
848     DPRINTF(("bracket 0 tail recursion\n"));
849     goto TAIL_RECURSE;
850     }
851    
852     /* Possibly empty group; can't use tail recursion. */
853    
854     RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
855     eptrb, flags, RM48);
856 ph10 512 if (rrc == MATCH_NOMATCH) md->mark = markptr;
857     RRETURN(rrc);
858 nigel 93 }
859 nigel 91
860     /* For non-final alternatives, continue the loop for a NOMATCH result;
861     otherwise return. */
862    
863 ph10 164 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
864     eptrb, flags, RM2);
865 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
866 nigel 77 ecode += GET(ecode, 1);
867     }
868 nigel 91 /* Control never reaches here. */
869 nigel 77
870     /* Conditional group: compilation checked that there are no more than
871     two branches. If the condition is false, skipping the first branch takes us
872     past the end if there is only one branch, but that's OK because that is
873 nigel 91 exactly what going to the ket would do. As there is only one branch to be
874     obeyed, we can use tail recursion to avoid using another stack frame. */
875 nigel 77
876     case OP_COND:
877 nigel 93 case OP_SCOND:
878 ph10 399 codelink= GET(ecode, 1);
879 ph10 406
880 ph10 381 /* Because of the way auto-callout works during compile, a callout item is
881     inserted between OP_COND and an assertion condition. */
882 ph10 392
883 ph10 381 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
884     {
885     if (pcre_callout != NULL)
886     {
887     pcre_callout_block cb;
888     cb.version = 1; /* Version 1 of the callout block */
889     cb.callout_number = ecode[LINK_SIZE+2];
890     cb.offset_vector = md->offset_vector;
891     cb.subject = (PCRE_SPTR)md->start_subject;
892     cb.subject_length = md->end_subject - md->start_subject;
893     cb.start_match = mstart - md->start_subject;
894     cb.current_position = eptr - md->start_subject;
895     cb.pattern_position = GET(ecode, LINK_SIZE + 3);
896     cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
897     cb.capture_top = offset_top/2;
898     cb.capture_last = md->capture_last;
899     cb.callout_data = md->callout_data;
900 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
901 ph10 381 if (rrc < 0) RRETURN(rrc);
902     }
903     ecode += _pcre_OP_lengths[OP_CALLOUT];
904     }
905 ph10 392
906 ph10 399 condcode = ecode[LINK_SIZE+1];
907 ph10 406
908 ph10 381 /* Now see what the actual condition is */
909 ph10 392
910 ph10 459 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
911 nigel 77 {
912 ph10 459 if (md->recursive == NULL) /* Not recursing => FALSE */
913     {
914 ph10 461 condition = FALSE;
915     ecode += GET(ecode, 1);
916     }
917 ph10 459 else
918 ph10 461 {
919 ph10 459 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
920     condition = (recno == RREF_ANY || recno == md->recursive->group_num);
921 ph10 461
922 ph10 459 /* If the test is for recursion into a specific subpattern, and it is
923     false, but the test was set up by name, scan the table to see if the
924     name refers to any other numbers, and test them. The condition is true
925     if any one is set. */
926 ph10 461
927 ph10 459 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
928     {
929     uschar *slotA = md->name_table;
930     for (i = 0; i < md->name_count; i++)
931 ph10 461 {
932     if (GET2(slotA, 0) == recno) break;
933 ph10 459 slotA += md->name_entry_size;
934     }
935 ph10 461
936 ph10 459 /* Found a name for the number - there can be only one; duplicate
937     names for different numbers are allowed, but not vice versa. First
938     scan down for duplicates. */
939 ph10 461
940 ph10 459 if (i < md->name_count)
941 ph10 461 {
942 ph10 459 uschar *slotB = slotA;
943     while (slotB > md->name_table)
944     {
945     slotB -= md->name_entry_size;
946     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
947     {
948     condition = GET2(slotB, 0) == md->recursive->group_num;
949 ph10 461 if (condition) break;
950     }
951 ph10 459 else break;
952 ph10 461 }
953    
954 ph10 459 /* Scan up for duplicates */
955 ph10 461
956 ph10 459 if (!condition)
957 ph10 461 {
958 ph10 459 slotB = slotA;
959     for (i++; i < md->name_count; i++)
960     {
961     slotB += md->name_entry_size;
962     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
963     {
964     condition = GET2(slotB, 0) == md->recursive->group_num;
965     if (condition) break;
966 ph10 461 }
967 ph10 459 else break;
968 ph10 461 }
969     }
970 ph10 459 }
971 ph10 461 }
972    
973 ph10 459 /* Chose branch according to the condition */
974 ph10 461
975 ph10 459 ecode += condition? 3 : GET(ecode, 1);
976     }
977 ph10 461 }
978 nigel 93
979 ph10 459 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
980 nigel 93 {
981 nigel 77 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
982 nigel 93 condition = offset < offset_top && md->offset_vector[offset] >= 0;
983 ph10 461
984 ph10 459 /* If the numbered capture is unset, but the reference was by name,
985 ph10 461 scan the table to see if the name refers to any other numbers, and test
986     them. The condition is true if any one is set. This is tediously similar
987     to the code above, but not close enough to try to amalgamate. */
988    
989 ph10 459 if (!condition && condcode == OP_NCREF)
990     {
991 ph10 461 int refno = offset >> 1;
992 ph10 459 uschar *slotA = md->name_table;
993 ph10 461
994 ph10 459 for (i = 0; i < md->name_count; i++)
995 ph10 461 {
996     if (GET2(slotA, 0) == refno) break;
997 ph10 459 slotA += md->name_entry_size;
998     }
999 ph10 461
1000     /* Found a name for the number - there can be only one; duplicate names
1001     for different numbers are allowed, but not vice versa. First scan down
1002 ph10 459 for duplicates. */
1003 ph10 461
1004 ph10 459 if (i < md->name_count)
1005 ph10 461 {
1006 ph10 459 uschar *slotB = slotA;
1007     while (slotB > md->name_table)
1008     {
1009     slotB -= md->name_entry_size;
1010     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1011     {
1012     offset = GET2(slotB, 0) << 1;
1013 ph10 461 condition = offset < offset_top &&
1014 ph10 459 md->offset_vector[offset] >= 0;
1015 ph10 461 if (condition) break;
1016     }
1017 ph10 459 else break;
1018 ph10 461 }
1019    
1020 ph10 459 /* Scan up for duplicates */
1021 ph10 461
1022 ph10 459 if (!condition)
1023 ph10 461 {
1024 ph10 459 slotB = slotA;
1025     for (i++; i < md->name_count; i++)
1026     {
1027     slotB += md->name_entry_size;
1028     if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1029     {
1030     offset = GET2(slotB, 0) << 1;
1031 ph10 461 condition = offset < offset_top &&
1032 ph10 459 md->offset_vector[offset] >= 0;
1033 ph10 461 if (condition) break;
1034     }
1035 ph10 459 else break;
1036 ph10 461 }
1037     }
1038 ph10 459 }
1039 ph10 461 }
1040    
1041 ph10 459 /* Chose branch according to the condition */
1042    
1043 nigel 93 ecode += condition? 3 : GET(ecode, 1);
1044 nigel 77 }
1045    
1046 ph10 399 else if (condcode == OP_DEF) /* DEFINE - always false */
1047 nigel 93 {
1048     condition = FALSE;
1049     ecode += GET(ecode, 1);
1050     }
1051    
1052 nigel 77 /* The condition is an assertion. Call match() to evaluate it - setting
1053 nigel 93 the final argument match_condassert causes it to stop at the end of an
1054     assertion. */
1055 nigel 77
1056     else
1057     {
1058 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1059     match_condassert, RM3);
1060 nigel 77 if (rrc == MATCH_MATCH)
1061     {
1062 nigel 93 condition = TRUE;
1063     ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1064 nigel 77 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1065     }
1066 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1067 nigel 77 {
1068     RRETURN(rrc); /* Need braces because of following else */
1069     }
1070 nigel 93 else
1071     {
1072     condition = FALSE;
1073 ph10 399 ecode += codelink;
1074 nigel 93 }
1075     }
1076 nigel 91
1077 nigel 93 /* We are now at the branch that is to be obeyed. As there is only one,
1078 ph10 197 we can use tail recursion to avoid using another stack frame, except when
1079     match_cbegroup is required for an unlimited repeat of a possibly empty
1080     group. If the second alternative doesn't exist, we can just plough on. */
1081 nigel 91
1082 nigel 93 if (condition || *ecode == OP_ALT)
1083     {
1084 nigel 91 ecode += 1 + LINK_SIZE;
1085 ph10 197 if (op == OP_SCOND) /* Possibly empty group */
1086     {
1087     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1088     RRETURN(rrc);
1089     }
1090     else /* Group must match something */
1091     {
1092     flags = 0;
1093     goto TAIL_RECURSE;
1094     }
1095 nigel 77 }
1096 ph10 395 else /* Condition false & no alternative */
1097 nigel 93 {
1098     ecode += 1 + LINK_SIZE;
1099     }
1100     break;
1101 nigel 77
1102 ph10 461
1103 ph10 447 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1104     to close any currently open capturing brackets. */
1105 ph10 461
1106 ph10 447 case OP_CLOSE:
1107 ph10 461 number = GET2(ecode, 1);
1108 ph10 447 offset = number << 1;
1109 ph10 461
1110 ph10 475 #ifdef PCRE_DEBUG
1111 ph10 447 printf("end bracket %d at *ACCEPT", number);
1112     printf("\n");
1113     #endif
1114 nigel 77
1115 ph10 447 md->capture_last = number;
1116     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1117     {
1118     md->offset_vector[offset] =
1119     md->offset_vector[md->offset_end - number];
1120     md->offset_vector[offset+1] = eptr - md->start_subject;
1121     if (offset_top <= offset) offset_top = offset + 2;
1122     }
1123     ecode += 3;
1124 ph10 461 break;
1125 ph10 447
1126    
1127 ph10 210 /* End of the pattern, either real or forced. If we are in a top-level
1128     recursion, we should restore the offsets appropriately and continue from
1129     after the call. */
1130 nigel 77
1131 ph10 210 case OP_ACCEPT:
1132 nigel 77 case OP_END:
1133     if (md->recursive != NULL && md->recursive->group_num == 0)
1134     {
1135     recursion_info *rec = md->recursive;
1136 nigel 87 DPRINTF(("End of pattern in a (?0) recursion\n"));
1137 nigel 77 md->recursive = rec->prevrec;
1138     memmove(md->offset_vector, rec->offset_save,
1139     rec->saved_max * sizeof(int));
1140 ph10 461 offset_top = rec->save_offset_top;
1141 nigel 77 ims = original_ims;
1142     ecode = rec->after_call;
1143     break;
1144     }
1145    
1146 ph10 442 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1147     set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1148     the subject. In both cases, backtracking will then try other alternatives,
1149     if any. */
1150 ph10 443
1151 ph10 442 if (eptr == mstart &&
1152     (md->notempty ||
1153 ph10 443 (md->notempty_atstart &&
1154 ph10 442 mstart == md->start_subject + md->start_offset)))
1155 ph10 510 MRRETURN(MATCH_NOMATCH);
1156 ph10 443
1157 ph10 442 /* Otherwise, we have a match. */
1158 nigel 77
1159 ph10 168 md->end_match_ptr = eptr; /* Record where we ended */
1160     md->end_offset_top = offset_top; /* and how many extracts were taken */
1161 ph10 210 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1162 nigel 77
1163 ph10 512 /* For some reason, the macros don't work properly if an expression is
1164     given as the argument to MRRETURN when the heap is in use. */
1165    
1166     rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1167     MRRETURN(rrc);
1168    
1169 nigel 77 /* Change option settings */
1170    
1171     case OP_OPT:
1172     ims = ecode[1];
1173     ecode += 2;
1174     DPRINTF(("ims set to %02lx\n", ims));
1175     break;
1176    
1177     /* Assertion brackets. Check the alternative branches in turn - the
1178     matching won't pass the KET for an assertion. If any one branch matches,
1179     the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1180     start of each branch to move the current point backwards, so the code at
1181     this level is identical to the lookahead case. */
1182    
1183     case OP_ASSERT:
1184     case OP_ASSERTBACK:
1185     do
1186     {
1187 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1188     RM4);
1189 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1190 ph10 500 {
1191     mstart = md->start_match_ptr; /* In case \K reset it */
1192     break;
1193 ph10 501 }
1194 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1195 nigel 77 ecode += GET(ecode, 1);
1196     }
1197     while (*ecode == OP_ALT);
1198 ph10 510 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1199 nigel 77
1200     /* If checking an assertion for a condition, return MATCH_MATCH. */
1201    
1202     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1203    
1204     /* Continue from after the assertion, updating the offsets high water
1205     mark, since extracts may have been taken during the assertion. */
1206    
1207     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1208     ecode += 1 + LINK_SIZE;
1209     offset_top = md->end_offset_top;
1210     continue;
1211    
1212 ph10 473 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1213 ph10 482 PRUNE, or COMMIT means we must assume failure without checking subsequent
1214 ph10 473 branches. */
1215 nigel 77
1216     case OP_ASSERT_NOT:
1217     case OP_ASSERTBACK_NOT:
1218     do
1219     {
1220 ph10 164 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1221     RM5);
1222 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1223 ph10 473 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1224     {
1225     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1226 ph10 482 break;
1227     }
1228 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1229 nigel 77 ecode += GET(ecode,1);
1230     }
1231     while (*ecode == OP_ALT);
1232    
1233     if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1234    
1235     ecode += 1 + LINK_SIZE;
1236     continue;
1237    
1238     /* Move the subject pointer back. This occurs only at the start of
1239     each branch of a lookbehind assertion. If we are too close to the start to
1240     move back, this match function fails. When working with UTF-8 we move
1241     back a number of characters, not bytes. */
1242    
1243     case OP_REVERSE:
1244     #ifdef SUPPORT_UTF8
1245     if (utf8)
1246     {
1247 nigel 93 i = GET(ecode, 1);
1248     while (i-- > 0)
1249 nigel 77 {
1250     eptr--;
1251 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1252 ph10 207 BACKCHAR(eptr);
1253 nigel 77 }
1254     }
1255     else
1256     #endif
1257    
1258     /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1259    
1260     {
1261 nigel 93 eptr -= GET(ecode, 1);
1262 ph10 510 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1263 nigel 77 }
1264    
1265 ph10 435 /* Save the earliest consulted character, then skip to next op code */
1266 nigel 77
1267 ph10 435 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1268 nigel 77 ecode += 1 + LINK_SIZE;
1269     break;
1270    
1271     /* The callout item calls an external function, if one is provided, passing
1272     details of the match so far. This is mainly for debugging, though the
1273     function is able to force a failure. */
1274    
1275     case OP_CALLOUT:
1276     if (pcre_callout != NULL)
1277     {
1278     pcre_callout_block cb;
1279     cb.version = 1; /* Version 1 of the callout block */
1280     cb.callout_number = ecode[1];
1281     cb.offset_vector = md->offset_vector;
1282 nigel 87 cb.subject = (PCRE_SPTR)md->start_subject;
1283 nigel 77 cb.subject_length = md->end_subject - md->start_subject;
1284 ph10 168 cb.start_match = mstart - md->start_subject;
1285 nigel 77 cb.current_position = eptr - md->start_subject;
1286     cb.pattern_position = GET(ecode, 2);
1287     cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1288     cb.capture_top = offset_top/2;
1289     cb.capture_last = md->capture_last;
1290     cb.callout_data = md->callout_data;
1291 ph10 510 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1292 nigel 77 if (rrc < 0) RRETURN(rrc);
1293     }
1294     ecode += 2 + 2*LINK_SIZE;
1295     break;
1296    
1297     /* Recursion either matches the current regex, or some subexpression. The
1298     offset data is the offset to the starting bracket from the start of the
1299     whole pattern. (This is so that it works from duplicated subpatterns.)
1300    
1301     If there are any capturing brackets started but not finished, we have to
1302     save their starting points and reinstate them after the recursion. However,
1303     we don't know how many such there are (offset_top records the completed
1304     total) so we just have to save all the potential data. There may be up to
1305     65535 such values, which is too large to put on the stack, but using malloc
1306     for small numbers seems expensive. As a compromise, the stack is used when
1307     there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1308     is used. A problem is what to do if the malloc fails ... there is no way of
1309     returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1310     values on the stack, and accept that the rest may be wrong.
1311    
1312     There are also other values that have to be saved. We use a chained
1313     sequence of blocks that actually live on the stack. Thanks to Robin Houston
1314     for the original version of this logic. */
1315    
1316     case OP_RECURSE:
1317     {
1318     callpat = md->start_code + GET(ecode, 1);
1319 nigel 93 new_recursive.group_num = (callpat == md->start_code)? 0 :
1320     GET2(callpat, 1 + LINK_SIZE);
1321 nigel 77
1322     /* Add to "recursing stack" */
1323    
1324     new_recursive.prevrec = md->recursive;
1325     md->recursive = &new_recursive;
1326    
1327     /* Find where to continue from afterwards */
1328    
1329     ecode += 1 + LINK_SIZE;
1330     new_recursive.after_call = ecode;
1331    
1332     /* Now save the offset data. */
1333    
1334     new_recursive.saved_max = md->offset_end;
1335     if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1336     new_recursive.offset_save = stacksave;
1337     else
1338     {
1339     new_recursive.offset_save =
1340     (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1341     if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1342     }
1343    
1344     memcpy(new_recursive.offset_save, md->offset_vector,
1345     new_recursive.saved_max * sizeof(int));
1346 ph10 461 new_recursive.save_offset_top = offset_top;
1347 nigel 77
1348     /* OK, now we can do the recursion. For each top-level alternative we
1349     restore the offset and recursion data. */
1350    
1351     DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1352 nigel 93 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1353 nigel 77 do
1354     {
1355 ph10 164 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1356     md, ims, eptrb, flags, RM6);
1357 ph10 511 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1358 nigel 77 {
1359 nigel 87 DPRINTF(("Recursion matched\n"));
1360 nigel 77 md->recursive = new_recursive.prevrec;
1361     if (new_recursive.offset_save != stacksave)
1362     (pcre_free)(new_recursive.offset_save);
1363 ph10 510 MRRETURN(MATCH_MATCH);
1364 nigel 77 }
1365 ph10 210 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1366 nigel 87 {
1367     DPRINTF(("Recursion gave error %d\n", rrc));
1368 ph10 400 if (new_recursive.offset_save != stacksave)
1369     (pcre_free)(new_recursive.offset_save);
1370 nigel 87 RRETURN(rrc);
1371     }
1372 nigel 77
1373     md->recursive = &new_recursive;
1374     memcpy(md->offset_vector, new_recursive.offset_save,
1375     new_recursive.saved_max * sizeof(int));
1376     callpat += GET(callpat, 1);
1377     }
1378     while (*callpat == OP_ALT);
1379    
1380     DPRINTF(("Recursion didn't match\n"));
1381     md->recursive = new_recursive.prevrec;
1382     if (new_recursive.offset_save != stacksave)
1383     (pcre_free)(new_recursive.offset_save);
1384 ph10 510 MRRETURN(MATCH_NOMATCH);
1385 nigel 77 }
1386     /* Control never reaches here */
1387    
1388     /* "Once" brackets are like assertion brackets except that after a match,
1389     the point in the subject string is not moved back. Thus there can never be
1390     a move back into the brackets. Friedl calls these "atomic" subpatterns.
1391     Check the alternative branches in turn - the matching won't pass the KET
1392     for this kind of subpattern. If any one branch matches, we carry on as at
1393 ph10 500 the end of a normal bracket, leaving the subject pointer, but resetting
1394     the start-of-match value in case it was changed by \K. */
1395 nigel 77
1396     case OP_ONCE:
1397 nigel 91 prev = ecode;
1398     saved_eptr = eptr;
1399    
1400     do
1401 nigel 77 {
1402 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1403 ph10 511 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1404 ph10 500 {
1405     mstart = md->start_match_ptr;
1406     break;
1407 ph10 501 }
1408 ph10 210 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1409 nigel 91 ecode += GET(ecode,1);
1410     }
1411     while (*ecode == OP_ALT);
1412 nigel 77
1413 nigel 91 /* If hit the end of the group (which could be repeated), fail */
1414 nigel 77
1415 nigel 91 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1416 nigel 77
1417 nigel 91 /* Continue as from after the assertion, updating the offsets high water
1418     mark, since extracts may have been taken. */
1419 nigel 77
1420 nigel 93 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1421 nigel 77
1422 nigel 91 offset_top = md->end_offset_top;
1423     eptr = md->end_match_ptr;
1424 nigel 77
1425 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1426     happens for a repeating ket if no characters were matched in the group.
1427     This is the forcible breaking of infinite loops as implemented in Perl
1428     5.005. If there is an options reset, it will get obeyed in the normal
1429     course of events. */
1430 nigel 77
1431 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1432     {
1433     ecode += 1+LINK_SIZE;
1434     break;
1435     }
1436 nigel 77
1437 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1438     preceding bracket, in the appropriate order. The second "call" of match()
1439     uses tail recursion, to avoid using another stack frame. We need to reset
1440     any options that changed within the bracket before re-running it, so
1441     check the next opcode. */
1442 nigel 77
1443 nigel 91 if (ecode[1+LINK_SIZE] == OP_OPT)
1444     {
1445     ims = (ims & ~PCRE_IMS) | ecode[4];
1446     DPRINTF(("ims set to %02lx at group repeat\n", ims));
1447     }
1448 nigel 77
1449 nigel 91 if (*ecode == OP_KETRMIN)
1450     {
1451 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1452 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1453     ecode = prev;
1454 ph10 197 flags = 0;
1455 nigel 91 goto TAIL_RECURSE;
1456 nigel 77 }
1457 nigel 91 else /* OP_KETRMAX */
1458     {
1459 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1460 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1461     ecode += 1 + LINK_SIZE;
1462 ph10 197 flags = 0;
1463 nigel 91 goto TAIL_RECURSE;
1464     }
1465     /* Control never gets here */
1466 nigel 77
1467     /* An alternation is the end of a branch; scan along to find the end of the
1468     bracketed group and go to there. */
1469    
1470     case OP_ALT:
1471     do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1472     break;
1473    
1474 ph10 335 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1475     indicating that it may occur zero times. It may repeat infinitely, or not
1476     at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1477     with fixed upper repeat limits are compiled as a number of copies, with the
1478     optional ones preceded by BRAZERO or BRAMINZERO. */
1479 nigel 77
1480     case OP_BRAZERO:
1481     {
1482     next = ecode+1;
1483 ph10 164 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1484 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1485     do next += GET(next,1); while (*next == OP_ALT);
1486 nigel 93 ecode = next + 1 + LINK_SIZE;
1487 nigel 77 }
1488     break;
1489    
1490     case OP_BRAMINZERO:
1491     {
1492     next = ecode+1;
1493 nigel 93 do next += GET(next, 1); while (*next == OP_ALT);
1494 ph10 164 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1495 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1496     ecode++;
1497     }
1498     break;
1499    
1500 ph10 335 case OP_SKIPZERO:
1501     {
1502     next = ecode+1;
1503     do next += GET(next,1); while (*next == OP_ALT);
1504     ecode = next + 1 + LINK_SIZE;
1505     }
1506     break;
1507    
1508 nigel 93 /* End of a group, repeated or non-repeating. */
1509 nigel 77
1510     case OP_KET:
1511     case OP_KETRMIN:
1512     case OP_KETRMAX:
1513 nigel 91 prev = ecode - GET(ecode, 1);
1514 nigel 77
1515 nigel 93 /* If this was a group that remembered the subject start, in order to break
1516     infinite repeats of empty string matches, retrieve the subject start from
1517     the chain. Otherwise, set it NULL. */
1518 nigel 77
1519 nigel 93 if (*prev >= OP_SBRA)
1520     {
1521     saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1522     eptrb = eptrb->epb_prev; /* Backup to previous group */
1523     }
1524     else saved_eptr = NULL;
1525 nigel 77
1526 ph10 500 /* If we are at the end of an assertion group or an atomic group, stop
1527     matching and return MATCH_MATCH, but record the current high water mark for
1528     use by positive assertions. We also need to record the match start in case
1529     it was changed by \K. */
1530 nigel 93
1531 nigel 91 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1532     *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1533     *prev == OP_ONCE)
1534     {
1535     md->end_match_ptr = eptr; /* For ONCE */
1536     md->end_offset_top = offset_top;
1537 ph10 500 md->start_match_ptr = mstart;
1538 ph10 510 MRRETURN(MATCH_MATCH);
1539 nigel 91 }
1540 nigel 77
1541 nigel 93 /* For capturing groups we have to check the group number back at the start
1542     and if necessary complete handling an extraction by setting the offsets and
1543     bumping the high water mark. Note that whole-pattern recursion is coded as
1544     a recurse into group 0, so it won't be picked up here. Instead, we catch it
1545     when the OP_END is reached. Other recursion is handled here. */
1546 nigel 77
1547 nigel 93 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1548 nigel 91 {
1549 nigel 93 number = GET2(prev, 1+LINK_SIZE);
1550 nigel 91 offset = number << 1;
1551 ph10 461
1552 ph10 475 #ifdef PCRE_DEBUG
1553 nigel 91 printf("end bracket %d", number);
1554     printf("\n");
1555 nigel 77 #endif
1556    
1557 nigel 93 md->capture_last = number;
1558     if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1559 nigel 91 {
1560 nigel 93 md->offset_vector[offset] =
1561     md->offset_vector[md->offset_end - number];
1562     md->offset_vector[offset+1] = eptr - md->start_subject;
1563     if (offset_top <= offset) offset_top = offset + 2;
1564     }
1565 nigel 77
1566 nigel 93 /* Handle a recursively called group. Restore the offsets
1567     appropriately and continue from after the call. */
1568 nigel 77
1569 nigel 93 if (md->recursive != NULL && md->recursive->group_num == number)
1570     {
1571     recursion_info *rec = md->recursive;
1572     DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1573     md->recursive = rec->prevrec;
1574     memcpy(md->offset_vector, rec->offset_save,
1575     rec->saved_max * sizeof(int));
1576 ph10 461 offset_top = rec->save_offset_top;
1577 nigel 93 ecode = rec->after_call;
1578     ims = original_ims;
1579     break;
1580 nigel 77 }
1581 nigel 91 }
1582 nigel 77
1583 nigel 93 /* For both capturing and non-capturing groups, reset the value of the ims
1584     flags, in case they got changed during the group. */
1585 nigel 77
1586 nigel 91 ims = original_ims;
1587     DPRINTF(("ims reset to %02lx\n", ims));
1588 nigel 77
1589 nigel 91 /* For a non-repeating ket, just continue at this level. This also
1590     happens for a repeating ket if no characters were matched in the group.
1591     This is the forcible breaking of infinite loops as implemented in Perl
1592     5.005. If there is an options reset, it will get obeyed in the normal
1593     course of events. */
1594 nigel 77
1595 nigel 91 if (*ecode == OP_KET || eptr == saved_eptr)
1596     {
1597     ecode += 1 + LINK_SIZE;
1598     break;
1599     }
1600 nigel 77
1601 nigel 91 /* The repeating kets try the rest of the pattern or restart from the
1602     preceding bracket, in the appropriate order. In the second case, we can use
1603 ph10 197 tail recursion to avoid using another stack frame, unless we have an
1604     unlimited repeat of a group that can match an empty string. */
1605 nigel 77
1606 nigel 93 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1607    
1608 nigel 91 if (*ecode == OP_KETRMIN)
1609     {
1610 ph10 197 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1611 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1612 ph10 197 if (flags != 0) /* Could match an empty string */
1613     {
1614     RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1615     RRETURN(rrc);
1616     }
1617 nigel 91 ecode = prev;
1618     goto TAIL_RECURSE;
1619 nigel 77 }
1620 nigel 91 else /* OP_KETRMAX */
1621     {
1622 ph10 164 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1623 nigel 91 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1624     ecode += 1 + LINK_SIZE;
1625 ph10 197 flags = 0;
1626 nigel 91 goto TAIL_RECURSE;
1627     }
1628     /* Control never gets here */
1629 nigel 77
1630     /* Start of subject unless notbol, or after internal newline if multiline */
1631    
1632     case OP_CIRC:
1633 ph10 510 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1634 nigel 77 if ((ims & PCRE_MULTILINE) != 0)
1635     {
1636 nigel 91 if (eptr != md->start_subject &&
1637 nigel 93 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1638 ph10 510 MRRETURN(MATCH_NOMATCH);
1639 nigel 77 ecode++;
1640     break;
1641     }
1642     /* ... else fall through */
1643    
1644     /* Start of subject assertion */
1645    
1646     case OP_SOD:
1647 ph10 510 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1648 nigel 77 ecode++;
1649     break;
1650    
1651     /* Start of match assertion */
1652    
1653     case OP_SOM:
1654 ph10 510 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1655 nigel 77 ecode++;
1656     break;
1657 ph10 172
1658 ph10 168 /* Reset the start of match point */
1659 ph10 172
1660 ph10 168 case OP_SET_SOM:
1661     mstart = eptr;
1662 ph10 172 ecode++;
1663     break;
1664 nigel 77
1665     /* Assert before internal newline if multiline, or before a terminating
1666     newline unless endonly is set, else end of subject unless noteol is set. */
1667    
1668     case OP_DOLL:
1669     if ((ims & PCRE_MULTILINE) != 0)
1670     {
1671     if (eptr < md->end_subject)
1672 ph10 510 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1673 nigel 77 else
1674 ph10 510 { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1675 nigel 77 ecode++;
1676     break;
1677     }
1678     else
1679     {
1680 ph10 510 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1681 nigel 77 if (!md->endonly)
1682     {
1683 nigel 91 if (eptr != md->end_subject &&
1684 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1685 ph10 510 MRRETURN(MATCH_NOMATCH);
1686 nigel 77 ecode++;
1687     break;
1688     }
1689     }
1690 nigel 91 /* ... else fall through for endonly */
1691 nigel 77
1692     /* End of subject assertion (\z) */
1693    
1694     case OP_EOD:
1695 ph10 510 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1696 nigel 77 ecode++;
1697     break;
1698    
1699     /* End of subject or ending \n assertion (\Z) */
1700    
1701     case OP_EODN:
1702 nigel 91 if (eptr != md->end_subject &&
1703 nigel 93 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1704 ph10 510 MRRETURN(MATCH_NOMATCH);
1705 nigel 77 ecode++;
1706     break;
1707    
1708     /* Word boundary assertions */
1709    
1710     case OP_NOT_WORD_BOUNDARY:
1711     case OP_WORD_BOUNDARY:
1712     {
1713    
1714     /* Find out if the previous and current characters are "word" characters.
1715     It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1716 ph10 443 be "non-word" characters. Remember the earliest consulted character for
1717 ph10 435 partial matching. */
1718 nigel 77
1719     #ifdef SUPPORT_UTF8
1720     if (utf8)
1721     {
1722 ph10 518 /* Get status of previous character */
1723 ph10 527
1724 nigel 77 if (eptr == md->start_subject) prev_is_word = FALSE; else
1725     {
1726 ph10 409 USPTR lastptr = eptr - 1;
1727 nigel 77 while((*lastptr & 0xc0) == 0x80) lastptr--;
1728 ph10 443 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1729 nigel 77 GETCHAR(c, lastptr);
1730 ph10 527 #ifdef SUPPORT_UCP
1731 ph10 518 if (md->use_ucp)
1732     {
1733     if (c == '_') prev_is_word = TRUE; else
1734 ph10 527 {
1735 ph10 518 int cat = UCD_CATEGORY(c);
1736     prev_is_word = (cat == ucp_L || cat == ucp_N);
1737 ph10 527 }
1738     }
1739     else
1740     #endif
1741 nigel 77 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1742     }
1743 ph10 527
1744 ph10 518 /* Get status of next character */
1745 ph10 527
1746 ph10 443 if (eptr >= md->end_subject)
1747 nigel 77 {
1748 ph10 443 SCHECK_PARTIAL();
1749     cur_is_word = FALSE;
1750 ph10 428 }
1751     else
1752     {
1753 nigel 77 GETCHAR(c, eptr);
1754 ph10 527 #ifdef SUPPORT_UCP
1755 ph10 518 if (md->use_ucp)
1756     {
1757     if (c == '_') cur_is_word = TRUE; else
1758 ph10 527 {
1759 ph10 518 int cat = UCD_CATEGORY(c);
1760     cur_is_word = (cat == ucp_L || cat == ucp_N);
1761 ph10 527 }
1762     }
1763     else
1764     #endif
1765 nigel 77 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1766     }
1767     }
1768     else
1769     #endif
1770    
1771 ph10 527 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1772 ph10 518 consistency with the behaviour of \w we do use it in this case. */
1773 nigel 77
1774     {
1775 ph10 518 /* Get status of previous character */
1776 ph10 527
1777 ph10 435 if (eptr == md->start_subject) prev_is_word = FALSE; else
1778     {
1779 ph10 443 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1780 ph10 527 #ifdef SUPPORT_UCP
1781 ph10 518 if (md->use_ucp)
1782     {
1783 ph10 527 c = eptr[-1];
1784 ph10 518 if (c == '_') prev_is_word = TRUE; else
1785 ph10 527 {
1786 ph10 518 int cat = UCD_CATEGORY(c);
1787     prev_is_word = (cat == ucp_L || cat == ucp_N);
1788 ph10 527 }
1789     }
1790     else
1791     #endif
1792 ph10 435 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1793     }
1794 ph10 527
1795 ph10 518 /* Get status of next character */
1796 ph10 527
1797 ph10 443 if (eptr >= md->end_subject)
1798 ph10 428 {
1799 ph10 443 SCHECK_PARTIAL();
1800     cur_is_word = FALSE;
1801 ph10 428 }
1802 ph10 527 else
1803     #ifdef SUPPORT_UCP
1804 ph10 518 if (md->use_ucp)
1805     {
1806 ph10 527 c = *eptr;
1807 ph10 518 if (c == '_') cur_is_word = TRUE; else
1808 ph10 527 {
1809 ph10 518 int cat = UCD_CATEGORY(c);
1810     cur_is_word = (cat == ucp_L || cat == ucp_N);
1811 ph10 527 }
1812     }
1813     else
1814     #endif
1815 ph10 518 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1816 nigel 77 }
1817    
1818     /* Now see if the situation is what we want */
1819    
1820     if ((*ecode++ == OP_WORD_BOUNDARY)?
1821     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1822 ph10 510 MRRETURN(MATCH_NOMATCH);
1823 nigel 77 }
1824     break;
1825    
1826     /* Match a single character type; inline for speed */
1827    
1828     case OP_ANY:
1829 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1830 ph10 345 /* Fall through */
1831    
1832 ph10 341 case OP_ALLANY:
1833 ph10 443 if (eptr++ >= md->end_subject)
1834 ph10 428 {
1835 ph10 443 SCHECK_PARTIAL();
1836 ph10 510 MRRETURN(MATCH_NOMATCH);
1837 ph10 443 }
1838 ph10 342 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1839 nigel 77 ecode++;
1840     break;
1841    
1842     /* Match a single byte, even in UTF-8 mode. This opcode really does match
1843     any byte, even newline, independent of the setting of PCRE_DOTALL. */
1844    
1845     case OP_ANYBYTE:
1846 ph10 443 if (eptr++ >= md->end_subject)
1847 ph10 428 {
1848 ph10 443 SCHECK_PARTIAL();
1849 ph10 510 MRRETURN(MATCH_NOMATCH);
1850 ph10 443 }
1851 nigel 77 ecode++;
1852     break;
1853    
1854     case OP_NOT_DIGIT:
1855 ph10 443 if (eptr >= md->end_subject)
1856 ph10 428 {
1857 ph10 443 SCHECK_PARTIAL();
1858 ph10 510 MRRETURN(MATCH_NOMATCH);
1859 ph10 443 }
1860 nigel 77 GETCHARINCTEST(c, eptr);
1861     if (
1862     #ifdef SUPPORT_UTF8
1863     c < 256 &&
1864     #endif
1865     (md->ctypes[c] & ctype_digit) != 0
1866     )
1867 ph10 510 MRRETURN(MATCH_NOMATCH);
1868 nigel 77 ecode++;
1869     break;
1870    
1871     case OP_DIGIT:
1872 ph10 443 if (eptr >= md->end_subject)
1873 ph10 428 {
1874 ph10 443 SCHECK_PARTIAL();
1875 ph10 510 MRRETURN(MATCH_NOMATCH);
1876 ph10 443 }
1877 nigel 77 GETCHARINCTEST(c, eptr);
1878     if (
1879     #ifdef SUPPORT_UTF8
1880     c >= 256 ||
1881     #endif
1882     (md->ctypes[c] & ctype_digit) == 0
1883     )
1884 ph10 510 MRRETURN(MATCH_NOMATCH);
1885 nigel 77 ecode++;
1886     break;
1887    
1888     case OP_NOT_WHITESPACE:
1889 ph10 443 if (eptr >= md->end_subject)
1890 ph10 428 {
1891 ph10 443 SCHECK_PARTIAL();
1892 ph10 510 MRRETURN(MATCH_NOMATCH);
1893 ph10 443 }
1894 nigel 77 GETCHARINCTEST(c, eptr);
1895     if (
1896     #ifdef SUPPORT_UTF8
1897     c < 256 &&
1898     #endif
1899     (md->ctypes[c] & ctype_space) != 0
1900     )
1901 ph10 510 MRRETURN(MATCH_NOMATCH);
1902 nigel 77 ecode++;
1903     break;
1904    
1905     case OP_WHITESPACE:
1906 ph10 443 if (eptr >= md->end_subject)
1907 ph10 428 {
1908 ph10 443 SCHECK_PARTIAL();
1909 ph10 510 MRRETURN(MATCH_NOMATCH);
1910 ph10 443 }
1911 nigel 77 GETCHARINCTEST(c, eptr);
1912     if (
1913     #ifdef SUPPORT_UTF8
1914     c >= 256 ||
1915     #endif
1916     (md->ctypes[c] & ctype_space) == 0
1917     )
1918 ph10 510 MRRETURN(MATCH_NOMATCH);
1919 nigel 77 ecode++;
1920     break;
1921    
1922     case OP_NOT_WORDCHAR:
1923 ph10 443 if (eptr >= md->end_subject)
1924 ph10 428 {
1925 ph10 443 SCHECK_PARTIAL();
1926 ph10 510 MRRETURN(MATCH_NOMATCH);
1927 ph10 443 }
1928 nigel 77 GETCHARINCTEST(c, eptr);
1929     if (
1930     #ifdef SUPPORT_UTF8
1931     c < 256 &&
1932     #endif
1933     (md->ctypes[c] & ctype_word) != 0
1934     )
1935 ph10 510 MRRETURN(MATCH_NOMATCH);
1936 nigel 77 ecode++;
1937     break;
1938    
1939     case OP_WORDCHAR:
1940 ph10 443 if (eptr >= md->end_subject)
1941 ph10 428 {
1942 ph10 443 SCHECK_PARTIAL();
1943 ph10 510 MRRETURN(MATCH_NOMATCH);
1944 ph10 443 }
1945 nigel 77 GETCHARINCTEST(c, eptr);
1946     if (
1947     #ifdef SUPPORT_UTF8
1948     c >= 256 ||
1949     #endif
1950     (md->ctypes[c] & ctype_word) == 0
1951     )
1952 ph10 510 MRRETURN(MATCH_NOMATCH);
1953 nigel 77 ecode++;
1954     break;
1955    
1956 nigel 93 case OP_ANYNL:
1957 ph10 443 if (eptr >= md->end_subject)
1958 ph10 428 {
1959 ph10 443 SCHECK_PARTIAL();
1960 ph10 510 MRRETURN(MATCH_NOMATCH);
1961 ph10 443 }
1962 nigel 93 GETCHARINCTEST(c, eptr);
1963     switch(c)
1964     {
1965 ph10 510 default: MRRETURN(MATCH_NOMATCH);
1966 nigel 93 case 0x000d:
1967     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1968     break;
1969 ph10 231
1970 nigel 93 case 0x000a:
1971 ph10 231 break;
1972    
1973 nigel 93 case 0x000b:
1974     case 0x000c:
1975     case 0x0085:
1976     case 0x2028:
1977     case 0x2029:
1978 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1979 nigel 93 break;
1980     }
1981     ecode++;
1982     break;
1983    
1984 ph10 178 case OP_NOT_HSPACE:
1985 ph10 443 if (eptr >= md->end_subject)
1986 ph10 428 {
1987 ph10 443 SCHECK_PARTIAL();
1988 ph10 510 MRRETURN(MATCH_NOMATCH);
1989 ph10 443 }
1990 ph10 178 GETCHARINCTEST(c, eptr);
1991     switch(c)
1992     {
1993     default: break;
1994     case 0x09: /* HT */
1995     case 0x20: /* SPACE */
1996     case 0xa0: /* NBSP */
1997     case 0x1680: /* OGHAM SPACE MARK */
1998     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1999     case 0x2000: /* EN QUAD */
2000     case 0x2001: /* EM QUAD */
2001     case 0x2002: /* EN SPACE */
2002     case 0x2003: /* EM SPACE */
2003     case 0x2004: /* THREE-PER-EM SPACE */
2004     case 0x2005: /* FOUR-PER-EM SPACE */
2005     case 0x2006: /* SIX-PER-EM SPACE */
2006     case 0x2007: /* FIGURE SPACE */
2007     case 0x2008: /* PUNCTUATION SPACE */
2008     case 0x2009: /* THIN SPACE */
2009     case 0x200A: /* HAIR SPACE */
2010     case 0x202f: /* NARROW NO-BREAK SPACE */
2011     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2012     case 0x3000: /* IDEOGRAPHIC SPACE */
2013 ph10 510 MRRETURN(MATCH_NOMATCH);
2014 ph10 178 }
2015     ecode++;
2016     break;
2017    
2018     case OP_HSPACE:
2019 ph10 443 if (eptr >= md->end_subject)
2020 ph10 428 {
2021 ph10 443 SCHECK_PARTIAL();
2022 ph10 510 MRRETURN(MATCH_NOMATCH);
2023 ph10 443 }
2024 ph10 178 GETCHARINCTEST(c, eptr);
2025     switch(c)
2026     {
2027 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2028 ph10 178 case 0x09: /* HT */
2029     case 0x20: /* SPACE */
2030     case 0xa0: /* NBSP */
2031     case 0x1680: /* OGHAM SPACE MARK */
2032     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2033     case 0x2000: /* EN QUAD */
2034     case 0x2001: /* EM QUAD */
2035     case 0x2002: /* EN SPACE */
2036     case 0x2003: /* EM SPACE */
2037     case 0x2004: /* THREE-PER-EM SPACE */
2038     case 0x2005: /* FOUR-PER-EM SPACE */
2039     case 0x2006: /* SIX-PER-EM SPACE */
2040     case 0x2007: /* FIGURE SPACE */
2041     case 0x2008: /* PUNCTUATION SPACE */
2042     case 0x2009: /* THIN SPACE */
2043     case 0x200A: /* HAIR SPACE */
2044     case 0x202f: /* NARROW NO-BREAK SPACE */
2045     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2046     case 0x3000: /* IDEOGRAPHIC SPACE */
2047     break;
2048     }
2049     ecode++;
2050     break;
2051    
2052     case OP_NOT_VSPACE:
2053 ph10 443 if (eptr >= md->end_subject)
2054 ph10 428 {
2055 ph10 443 SCHECK_PARTIAL();
2056 ph10 510 MRRETURN(MATCH_NOMATCH);
2057 ph10 443 }
2058 ph10 178 GETCHARINCTEST(c, eptr);
2059     switch(c)
2060     {
2061     default: break;
2062     case 0x0a: /* LF */
2063     case 0x0b: /* VT */
2064     case 0x0c: /* FF */
2065     case 0x0d: /* CR */
2066     case 0x85: /* NEL */
2067     case 0x2028: /* LINE SEPARATOR */
2068     case 0x2029: /* PARAGRAPH SEPARATOR */
2069 ph10 510 MRRETURN(MATCH_NOMATCH);
2070 ph10 178 }
2071     ecode++;
2072     break;
2073    
2074     case OP_VSPACE:
2075 ph10 443 if (eptr >= md->end_subject)
2076 ph10 428 {
2077 ph10 443 SCHECK_PARTIAL();
2078 ph10 510 MRRETURN(MATCH_NOMATCH);
2079 ph10 443 }
2080 ph10 178 GETCHARINCTEST(c, eptr);
2081     switch(c)
2082     {
2083 ph10 510 default: MRRETURN(MATCH_NOMATCH);
2084 ph10 178 case 0x0a: /* LF */
2085     case 0x0b: /* VT */
2086     case 0x0c: /* FF */
2087     case 0x0d: /* CR */
2088     case 0x85: /* NEL */
2089     case 0x2028: /* LINE SEPARATOR */
2090     case 0x2029: /* PARAGRAPH SEPARATOR */
2091     break;
2092     }
2093     ecode++;
2094     break;
2095    
2096 nigel 77 #ifdef SUPPORT_UCP
2097     /* Check the next character by Unicode property. We will get here only
2098     if the support is in the binary; otherwise a compile-time error occurs. */
2099    
2100     case OP_PROP:
2101     case OP_NOTPROP:
2102 ph10 443 if (eptr >= md->end_subject)
2103 ph10 428 {
2104 ph10 443 SCHECK_PARTIAL();
2105 ph10 510 MRRETURN(MATCH_NOMATCH);
2106 ph10 443 }
2107 nigel 77 GETCHARINCTEST(c, eptr);
2108     {
2109 ph10 384 const ucd_record *prop = GET_UCD(c);
2110 nigel 77
2111 nigel 87 switch(ecode[1])
2112     {
2113     case PT_ANY:
2114 ph10 510 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2115 nigel 87 break;
2116 nigel 77
2117 nigel 87 case PT_LAMP:
2118 ph10 349 if ((prop->chartype == ucp_Lu ||
2119     prop->chartype == ucp_Ll ||
2120     prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2121 ph10 510 MRRETURN(MATCH_NOMATCH);
2122 ph10 517 break;
2123 nigel 87
2124     case PT_GC:
2125 ph10 351 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2126 ph10 510 MRRETURN(MATCH_NOMATCH);
2127 nigel 87 break;
2128    
2129     case PT_PC:
2130 ph10 349 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2131 ph10 510 MRRETURN(MATCH_NOMATCH);
2132 nigel 87 break;
2133    
2134     case PT_SC:
2135 ph10 349 if ((ecode[2] != prop->script) == (op == OP_PROP))
2136 ph10 510 MRRETURN(MATCH_NOMATCH);
2137 nigel 87 break;
2138 ph10 527
2139 ph10 517 /* These are specials */
2140 ph10 527
2141 ph10 517 case PT_ALNUM:
2142     if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2143     _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2144     MRRETURN(MATCH_NOMATCH);
2145 ph10 527 break;
2146    
2147 ph10 517 case PT_SPACE: /* Perl space */
2148     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2149     c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2150     == (op == OP_NOTPROP))
2151     MRRETURN(MATCH_NOMATCH);
2152 ph10 527 break;
2153    
2154 ph10 517 case PT_PXSPACE: /* POSIX space */
2155     if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2156 ph10 527 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2157 ph10 517 c == CHAR_FF || c == CHAR_CR)
2158     == (op == OP_NOTPROP))
2159     MRRETURN(MATCH_NOMATCH);
2160 ph10 527 break;
2161 nigel 87
2162 ph10 527 case PT_WORD:
2163 ph10 517 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2164 ph10 527 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2165 ph10 517 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2166     MRRETURN(MATCH_NOMATCH);
2167 ph10 527 break;
2168    
2169 ph10 517 /* This should never occur */
2170    
2171 nigel 87 default:
2172     RRETURN(PCRE_ERROR_INTERNAL);
2173 nigel 77 }
2174 nigel 87
2175     ecode += 3;
2176 nigel 77 }
2177     break;
2178    
2179     /* Match an extended Unicode sequence. We will get here only if the support
2180     is in the binary; otherwise a compile-time error occurs. */
2181    
2182     case OP_EXTUNI:
2183 ph10 443 if (eptr >= md->end_subject)
2184 ph10 428 {
2185 ph10 443 SCHECK_PARTIAL();
2186 ph10 510 MRRETURN(MATCH_NOMATCH);
2187 ph10 443 }
2188 nigel 77 GETCHARINCTEST(c, eptr);
2189     {
2190 ph10 349 int category = UCD_CATEGORY(c);
2191 ph10 510 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2192 nigel 77 while (eptr < md->end_subject)
2193     {
2194     int len = 1;
2195     if (!utf8) c = *eptr; else
2196     {
2197     GETCHARLEN(c, eptr, len);
2198     }
2199 ph10 349 category = UCD_CATEGORY(c);
2200 nigel 77 if (category != ucp_M) break;
2201     eptr += len;
2202     }
2203     }
2204     ecode++;
2205     break;
2206     #endif
2207    
2208    
2209     /* Match a back reference, possibly repeatedly. Look past the end of the
2210     item to see if there is repeat information following. The code is similar
2211     to that for character classes, but repeated for efficiency. Then obey
2212     similar code to character type repeats - written out again for speed.
2213     However, if the referenced string is the empty string, always treat
2214     it as matched, any number of times (otherwise there could be infinite
2215     loops). */
2216    
2217     case OP_REF:
2218     {
2219     offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2220 ph10 345 ecode += 3;
2221    
2222 ph10 336 /* If the reference is unset, there are two possibilities:
2223 ph10 345
2224 ph10 336 (a) In the default, Perl-compatible state, set the length to be longer
2225     than the amount of subject left; this ensures that every attempt at a
2226     match fails. We can't just fail here, because of the possibility of
2227     quantifiers with zero minima.
2228 ph10 345
2229     (b) If the JavaScript compatibility flag is set, set the length to zero
2230     so that the back reference matches an empty string.
2231    
2232     Otherwise, set the length to the length of what was matched by the
2233 ph10 336 referenced subpattern. */
2234 ph10 345
2235 ph10 336 if (offset >= offset_top || md->offset_vector[offset] < 0)
2236 ph10 345 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2237 ph10 336 else
2238     length = md->offset_vector[offset+1] - md->offset_vector[offset];
2239 nigel 77
2240     /* Set up for repetition, or handle the non-repeated case */
2241    
2242     switch (*ecode)
2243     {
2244     case OP_CRSTAR:
2245     case OP_CRMINSTAR:
2246     case OP_CRPLUS:
2247     case OP_CRMINPLUS:
2248     case OP_CRQUERY:
2249     case OP_CRMINQUERY:
2250     c = *ecode++ - OP_CRSTAR;
2251     minimize = (c & 1) != 0;
2252     min = rep_min[c]; /* Pick up values from tables; */
2253     max = rep_max[c]; /* zero for max => infinity */
2254     if (max == 0) max = INT_MAX;
2255     break;
2256    
2257     case OP_CRRANGE:
2258     case OP_CRMINRANGE:
2259     minimize = (*ecode == OP_CRMINRANGE);
2260     min = GET2(ecode, 1);
2261     max = GET2(ecode, 3);
2262     if (max == 0) max = INT_MAX;
2263     ecode += 5;
2264     break;
2265    
2266     default: /* No repeat follows */
2267 ph10 443 if (!match_ref(offset, eptr, length, md, ims))
2268 ph10 428 {
2269 ph10 443 CHECK_PARTIAL();
2270 ph10 510 MRRETURN(MATCH_NOMATCH);
2271 ph10 443 }
2272 nigel 77 eptr += length;
2273     continue; /* With the main loop */
2274     }
2275    
2276     /* If the length of the reference is zero, just continue with the
2277     main loop. */
2278 ph10 443
2279 nigel 77 if (length == 0) continue;
2280    
2281     /* First, ensure the minimum number of matches are present. We get back
2282     the length of the reference string explicitly rather than passing the
2283     address of eptr, so that eptr can be a register variable. */
2284    
2285     for (i = 1; i <= min; i++)
2286     {
2287 ph10 427 if (!match_ref(offset, eptr, length, md, ims))
2288 ph10 426 {
2289 ph10 427 CHECK_PARTIAL();
2290 ph10 510 MRRETURN(MATCH_NOMATCH);
2291 ph10 427 }
2292 nigel 77 eptr += length;
2293     }
2294    
2295     /* If min = max, continue at the same level without recursion.
2296     They are not both allowed to be zero. */
2297    
2298     if (min == max) continue;
2299    
2300     /* If minimizing, keep trying and advancing the pointer */
2301    
2302     if (minimize)
2303     {
2304     for (fi = min;; fi++)
2305     {
2306 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2307 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2308 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2309 ph10 428 if (!match_ref(offset, eptr, length, md, ims))
2310 ph10 426 {
2311 ph10 427 CHECK_PARTIAL();
2312 ph10 510 MRRETURN(MATCH_NOMATCH);
2313 ph10 427 }
2314 nigel 77 eptr += length;
2315     }
2316     /* Control never gets here */
2317     }
2318    
2319     /* If maximizing, find the longest string and work backwards */
2320    
2321     else
2322     {
2323     pp = eptr;
2324     for (i = min; i < max; i++)
2325     {
2326 ph10 463 if (!match_ref(offset, eptr, length, md, ims))
2327 ph10 462 {
2328 ph10 463 CHECK_PARTIAL();
2329 ph10 462 break;
2330 ph10 463 }
2331 nigel 77 eptr += length;
2332     }
2333     while (eptr >= pp)
2334     {
2335 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2336 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2337     eptr -= length;
2338     }
2339 ph10 510 MRRETURN(MATCH_NOMATCH);
2340 nigel 77 }
2341     }
2342     /* Control never gets here */
2343    
2344     /* Match a bit-mapped character class, possibly repeatedly. This op code is
2345     used when all the characters in the class have values in the range 0-255,
2346     and either the matching is caseful, or the characters are in the range
2347     0-127 when UTF-8 processing is enabled. The only difference between
2348     OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2349     encountered.
2350    
2351     First, look past the end of the item to see if there is repeat information
2352     following. Then obey similar code to character type repeats - written out
2353     again for speed. */
2354    
2355     case OP_NCLASS:
2356     case OP_CLASS:
2357     {
2358     data = ecode + 1; /* Save for matching */
2359     ecode += 33; /* Advance past the item */
2360    
2361     switch (*ecode)
2362     {
2363     case OP_CRSTAR:
2364     case OP_CRMINSTAR:
2365     case OP_CRPLUS:
2366     case OP_CRMINPLUS:
2367     case OP_CRQUERY:
2368     case OP_CRMINQUERY:
2369     c = *ecode++ - OP_CRSTAR;
2370     minimize = (c & 1) != 0;
2371     min = rep_min[c]; /* Pick up values from tables; */
2372     max = rep_max[c]; /* zero for max => infinity */
2373     if (max == 0) max = INT_MAX;
2374     break;
2375    
2376     case OP_CRRANGE:
2377     case OP_CRMINRANGE:
2378     minimize = (*ecode == OP_CRMINRANGE);
2379     min = GET2(ecode, 1);
2380     max = GET2(ecode, 3);
2381     if (max == 0) max = INT_MAX;
2382     ecode += 5;
2383     break;
2384    
2385     default: /* No repeat follows */
2386     min = max = 1;
2387     break;
2388     }
2389    
2390     /* First, ensure the minimum number of matches are present. */
2391    
2392     #ifdef SUPPORT_UTF8
2393     /* UTF-8 mode */
2394     if (utf8)
2395     {
2396     for (i = 1; i <= min; i++)
2397     {
2398 ph10 427 if (eptr >= md->end_subject)
2399 ph10 426 {
2400 ph10 428 SCHECK_PARTIAL();
2401 ph10 510 MRRETURN(MATCH_NOMATCH);
2402 ph10 427 }
2403 nigel 77 GETCHARINC(c, eptr);
2404     if (c > 255)
2405     {
2406 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2407 nigel 77 }
2408     else
2409     {
2410 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2411 nigel 77 }
2412     }
2413     }
2414     else
2415     #endif
2416     /* Not UTF-8 mode */
2417     {
2418     for (i = 1; i <= min; i++)
2419     {
2420 ph10 427 if (eptr >= md->end_subject)
2421 ph10 426 {
2422 ph10 428 SCHECK_PARTIAL();
2423 ph10 510 MRRETURN(MATCH_NOMATCH);
2424 ph10 427 }
2425 nigel 77 c = *eptr++;
2426 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2427 nigel 77 }
2428     }
2429    
2430     /* If max == min we can continue with the main loop without the
2431     need to recurse. */
2432    
2433     if (min == max) continue;
2434    
2435     /* If minimizing, keep testing the rest of the expression and advancing
2436     the pointer while it matches the class. */
2437    
2438     if (minimize)
2439     {
2440     #ifdef SUPPORT_UTF8
2441     /* UTF-8 mode */
2442     if (utf8)
2443     {
2444     for (fi = min;; fi++)
2445     {
2446 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2447 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2448 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2449 ph10 427 if (eptr >= md->end_subject)
2450 ph10 426 {
2451 ph10 427 SCHECK_PARTIAL();
2452 ph10 510 MRRETURN(MATCH_NOMATCH);
2453 ph10 427 }
2454 nigel 77 GETCHARINC(c, eptr);
2455     if (c > 255)
2456     {
2457 ph10 510 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2458 nigel 77 }
2459     else
2460     {
2461 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2462 nigel 77 }
2463     }
2464     }
2465     else
2466     #endif
2467     /* Not UTF-8 mode */
2468     {
2469     for (fi = min;; fi++)
2470     {
2471 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2472 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2474 ph10 427 if (eptr >= md->end_subject)
2475 ph10 426 {
2476 ph10 427 SCHECK_PARTIAL();
2477 ph10 510 MRRETURN(MATCH_NOMATCH);
2478 ph10 427 }
2479 nigel 77 c = *eptr++;
2480 ph10 510 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2481 nigel 77 }
2482     }
2483     /* Control never gets here */
2484     }
2485    
2486     /* If maximizing, find the longest possible run, then work backwards. */
2487    
2488     else
2489     {
2490     pp = eptr;
2491    
2492     #ifdef SUPPORT_UTF8
2493     /* UTF-8 mode */
2494     if (utf8)
2495     {
2496     for (i = min; i < max; i++)
2497     {
2498     int len = 1;
2499 ph10 463 if (eptr >= md->end_subject)
2500 ph10 462 {
2501 ph10 463 SCHECK_PARTIAL();
2502 ph10 462 break;
2503 ph10 463 }
2504 nigel 77 GETCHARLEN(c, eptr, len);
2505     if (c > 255)
2506     {
2507     if (op == OP_CLASS) break;
2508     }
2509     else
2510     {
2511     if ((data[c/8] & (1 << (c&7))) == 0) break;
2512     }
2513     eptr += len;
2514     }
2515     for (;;)
2516     {
2517 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2518 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2519     if (eptr-- == pp) break; /* Stop if tried at original pos */
2520     BACKCHAR(eptr);
2521     }
2522     }
2523     else
2524     #endif
2525     /* Not UTF-8 mode */
2526     {
2527     for (i = min; i < max; i++)
2528     {
2529 ph10 463 if (eptr >= md->end_subject)
2530 ph10 462 {
2531 ph10 463 SCHECK_PARTIAL();
2532 ph10 462 break;
2533 ph10 463 }
2534 nigel 77 c = *eptr;
2535     if ((data[c/8] & (1 << (c&7))) == 0) break;
2536     eptr++;
2537     }
2538     while (eptr >= pp)
2539     {
2540 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2541 nigel 87 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2542 nigel 77 eptr--;
2543     }
2544     }
2545    
2546 ph10 510 MRRETURN(MATCH_NOMATCH);
2547 nigel 77 }
2548     }
2549     /* Control never gets here */
2550    
2551    
2552     /* Match an extended character class. This opcode is encountered only
2553 ph10 384 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2554     mode, because Unicode properties are supported in non-UTF-8 mode. */
2555 nigel 77
2556     #ifdef SUPPORT_UTF8
2557     case OP_XCLASS:
2558     {
2559     data = ecode + 1 + LINK_SIZE; /* Save for matching */
2560     ecode += GET(ecode, 1); /* Advance past the item */
2561    
2562     switch (*ecode)
2563     {
2564     case OP_CRSTAR:
2565     case OP_CRMINSTAR:
2566     case OP_CRPLUS:
2567     case OP_CRMINPLUS:
2568     case OP_CRQUERY:
2569     case OP_CRMINQUERY:
2570     c = *ecode++ - OP_CRSTAR;
2571     minimize = (c & 1) != 0;
2572     min = rep_min[c]; /* Pick up values from tables; */
2573     max = rep_max[c]; /* zero for max => infinity */
2574     if (max == 0) max = INT_MAX;
2575     break;
2576    
2577     case OP_CRRANGE:
2578     case OP_CRMINRANGE:
2579     minimize = (*ecode == OP_CRMINRANGE);
2580     min = GET2(ecode, 1);
2581     max = GET2(ecode, 3);
2582     if (max == 0) max = INT_MAX;
2583     ecode += 5;
2584     break;
2585    
2586     default: /* No repeat follows */
2587     min = max = 1;
2588     break;
2589     }
2590    
2591     /* First, ensure the minimum number of matches are present. */
2592    
2593     for (i = 1; i <= min; i++)
2594     {
2595 ph10 427 if (eptr >= md->end_subject)
2596 ph10 426 {
2597     SCHECK_PARTIAL();
2598 ph10 510 MRRETURN(MATCH_NOMATCH);
2599 ph10 427 }
2600 ph10 384 GETCHARINCTEST(c, eptr);
2601 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2602 nigel 77 }
2603    
2604     /* If max == min we can continue with the main loop without the
2605     need to recurse. */
2606    
2607     if (min == max) continue;
2608    
2609     /* If minimizing, keep testing the rest of the expression and advancing
2610     the pointer while it matches the class. */
2611    
2612     if (minimize)
2613     {
2614     for (fi = min;; fi++)
2615     {
2616 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2617 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2618 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2619 ph10 427 if (eptr >= md->end_subject)
2620 ph10 426 {
2621 ph10 427 SCHECK_PARTIAL();
2622 ph10 510 MRRETURN(MATCH_NOMATCH);
2623 ph10 427 }
2624 ph10 384 GETCHARINCTEST(c, eptr);
2625 ph10 510 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2626 nigel 77 }
2627     /* Control never gets here */
2628     }
2629    
2630     /* If maximizing, find the longest possible run, then work backwards. */
2631    
2632     else
2633     {
2634     pp = eptr;
2635     for (i = min; i < max; i++)
2636     {
2637     int len = 1;
2638 ph10 463 if (eptr >= md->end_subject)
2639 ph10 462 {
2640 ph10 463 SCHECK_PARTIAL();
2641 ph10 462 break;
2642 ph10 463 }
2643 ph10 384 GETCHARLENTEST(c, eptr, len);
2644 nigel 77 if (!_pcre_xclass(c, data)) break;
2645     eptr += len;
2646     }
2647     for(;;)
2648     {
2649 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2650 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651     if (eptr-- == pp) break; /* Stop if tried at original pos */
2652 ph10 214 if (utf8) BACKCHAR(eptr);
2653 nigel 77 }
2654 ph10 510 MRRETURN(MATCH_NOMATCH);
2655 nigel 77 }
2656    
2657     /* Control never gets here */
2658     }
2659     #endif /* End of XCLASS */
2660    
2661     /* Match a single character, casefully */
2662    
2663     case OP_CHAR:
2664     #ifdef SUPPORT_UTF8
2665     if (utf8)
2666     {
2667     length = 1;
2668     ecode++;
2669     GETCHARLEN(fc, ecode, length);
2670 ph10 443 if (length > md->end_subject - eptr)
2671 ph10 428 {
2672     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2673 ph10 510 MRRETURN(MATCH_NOMATCH);
2674 ph10 443 }
2675 ph10 510 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2676 nigel 77 }
2677     else
2678     #endif
2679    
2680     /* Non-UTF-8 mode */
2681     {
2682 ph10 443 if (md->end_subject - eptr < 1)
2683 ph10 428 {
2684     SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2685 ph10 510 MRRETURN(MATCH_NOMATCH);
2686 ph10 443 }
2687 ph10 510 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2688 nigel 77 ecode += 2;
2689     }
2690     break;
2691    
2692     /* Match a single character, caselessly */
2693    
2694     case OP_CHARNC:
2695     #ifdef SUPPORT_UTF8
2696     if (utf8)
2697     {
2698     length = 1;
2699     ecode++;
2700     GETCHARLEN(fc, ecode, length);
2701    
2702 ph10 443 if (length > md->end_subject - eptr)
2703 ph10 428 {
2704     CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2705 ph10 510 MRRETURN(MATCH_NOMATCH);
2706 ph10 443 }
2707 nigel 77
2708     /* If the pattern character's value is < 128, we have only one byte, and
2709     can use the fast lookup table. */
2710    
2711     if (fc < 128)
2712     {
2713 ph10 510 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2714 nigel 77 }
2715    
2716     /* Otherwise we must pick up the subject character */
2717    
2718     else
2719     {
2720 nigel 93 unsigned int dc;
2721 nigel 77 GETCHARINC(dc, eptr);
2722     ecode += length;
2723    
2724     /* If we have Unicode property support, we can use it to test the other
2725 nigel 87 case of the character, if there is one. */
2726 nigel 77
2727     if (fc != dc)
2728     {
2729     #ifdef SUPPORT_UCP
2730 ph10 349 if (dc != UCD_OTHERCASE(fc))
2731 nigel 77 #endif
2732 ph10 510 MRRETURN(MATCH_NOMATCH);
2733 nigel 77 }
2734     }
2735     }
2736     else
2737     #endif /* SUPPORT_UTF8 */
2738    
2739     /* Non-UTF-8 mode */
2740     {
2741 ph10 443 if (md->end_subject - eptr < 1)
2742 ph10 428 {
2743 ph10 443 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2744 ph10 510 MRRETURN(MATCH_NOMATCH);
2745 ph10 443 }
2746 ph10 510 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747 nigel 77 ecode += 2;
2748     }
2749     break;
2750    
2751 nigel 93 /* Match a single character repeatedly. */
2752 nigel 77
2753     case OP_EXACT:
2754     min = max = GET2(ecode, 1);
2755     ecode += 3;
2756     goto REPEATCHAR;
2757    
2758 nigel 93 case OP_POSUPTO:
2759     possessive = TRUE;
2760     /* Fall through */
2761    
2762 nigel 77 case OP_UPTO:
2763     case OP_MINUPTO:
2764     min = 0;
2765     max = GET2(ecode, 1);
2766     minimize = *ecode == OP_MINUPTO;
2767     ecode += 3;
2768     goto REPEATCHAR;
2769    
2770 nigel 93 case OP_POSSTAR:
2771     possessive = TRUE;
2772     min = 0;
2773     max = INT_MAX;
2774     ecode++;
2775     goto REPEATCHAR;
2776    
2777     case OP_POSPLUS:
2778     possessive = TRUE;
2779     min = 1;
2780     max = INT_MAX;
2781     ecode++;
2782     goto REPEATCHAR;
2783    
2784     case OP_POSQUERY:
2785     possessive = TRUE;
2786     min = 0;
2787     max = 1;
2788     ecode++;
2789     goto REPEATCHAR;
2790    
2791 nigel 77 case OP_STAR:
2792     case OP_MINSTAR:
2793     case OP_PLUS:
2794     case OP_MINPLUS:
2795     case OP_QUERY:
2796     case OP_MINQUERY:
2797     c = *ecode++ - OP_STAR;
2798     minimize = (c & 1) != 0;
2799 ph10 443
2800 nigel 77 min = rep_min[c]; /* Pick up values from tables; */
2801     max = rep_max[c]; /* zero for max => infinity */
2802     if (max == 0) max = INT_MAX;
2803    
2804 ph10 426 /* Common code for all repeated single-character matches. */
2805 nigel 77
2806     REPEATCHAR:
2807     #ifdef SUPPORT_UTF8
2808     if (utf8)
2809     {
2810     length = 1;
2811     charptr = ecode;
2812     GETCHARLEN(fc, ecode, length);
2813     ecode += length;
2814    
2815     /* Handle multibyte character matching specially here. There is
2816     support for caseless matching if UCP support is present. */
2817    
2818     if (length > 1)
2819     {
2820     #ifdef SUPPORT_UCP
2821 nigel 93 unsigned int othercase;
2822 nigel 77 if ((ims & PCRE_CASELESS) != 0 &&
2823 ph10 349 (othercase = UCD_OTHERCASE(fc)) != fc)
2824 nigel 77 oclength = _pcre_ord2utf8(othercase, occhars);
2825 ph10 115 else oclength = 0;
2826 nigel 77 #endif /* SUPPORT_UCP */
2827    
2828     for (i = 1; i <= min; i++)
2829     {
2830 ph10 426 if (eptr <= md->end_subject - length &&
2831     memcmp(eptr, charptr, length) == 0) eptr += length;
2832 ph10 123 #ifdef SUPPORT_UCP
2833 ph10 426 else if (oclength > 0 &&
2834     eptr <= md->end_subject - oclength &&
2835     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2836     #endif /* SUPPORT_UCP */
2837 nigel 77 else
2838     {
2839 ph10 426 CHECK_PARTIAL();
2840 ph10 510 MRRETURN(MATCH_NOMATCH);
2841 nigel 77 }
2842     }
2843    
2844     if (min == max) continue;
2845    
2846     if (minimize)
2847     {
2848     for (fi = min;; fi++)
2849     {
2850 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2851 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2852 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2853 ph10 426 if (eptr <= md->end_subject - length &&
2854     memcmp(eptr, charptr, length) == 0) eptr += length;
2855 ph10 123 #ifdef SUPPORT_UCP
2856 ph10 426 else if (oclength > 0 &&
2857     eptr <= md->end_subject - oclength &&
2858     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2859     #endif /* SUPPORT_UCP */
2860 nigel 77 else
2861     {
2862 ph10 426 CHECK_PARTIAL();
2863 ph10 510 MRRETURN(MATCH_NOMATCH);
2864 nigel 77 }
2865     }
2866     /* Control never gets here */
2867     }
2868 nigel 93
2869     else /* Maximize */
2870 nigel 77 {
2871     pp = eptr;
2872     for (i = min; i < max; i++)
2873     {
2874 ph10 426 if (eptr <= md->end_subject - length &&
2875     memcmp(eptr, charptr, length) == 0) eptr += length;
2876 ph10 123 #ifdef SUPPORT_UCP
2877 ph10 426 else if (oclength > 0 &&
2878     eptr <= md->end_subject - oclength &&
2879     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2880     #endif /* SUPPORT_UCP */
2881 ph10 463 else
2882 ph10 462 {
2883 ph10 463 CHECK_PARTIAL();
2884 ph10 462 break;
2885 ph10 463 }
2886 nigel 77 }
2887 nigel 93
2888     if (possessive) continue;
2889 ph10 427
2890 ph10 120 for(;;)
2891 ph10 426 {
2892     RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2893     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2894 ph10 510 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2895 ph10 115 #ifdef SUPPORT_UCP
2896 ph10 426 eptr--;
2897     BACKCHAR(eptr);
2898 ph10 123 #else /* without SUPPORT_UCP */
2899 ph10 426 eptr -= length;
2900 ph10 123 #endif /* SUPPORT_UCP */
2901 ph10 426 }
2902 nigel 77 }
2903     /* Control never gets here */
2904     }
2905    
2906     /* If the length of a UTF-8 character is 1, we fall through here, and
2907     obey the code as for non-UTF-8 characters below, though in this case the
2908     value of fc will always be < 128. */
2909     }
2910     else
2911     #endif /* SUPPORT_UTF8 */
2912    
2913     /* When not in UTF-8 mode, load a single-byte character. */
2914    
2915 ph10 426 fc = *ecode++;
2916 ph10 443
2917 nigel 77 /* The value of fc at this point is always less than 256, though we may or
2918     may not be in UTF-8 mode. The code is duplicated for the caseless and
2919     caseful cases, for speed, since matching characters is likely to be quite
2920     common. First, ensure the minimum number of matches are present. If min =
2921     max, continue at the same level without recursing. Otherwise, if
2922     minimizing, keep trying the rest of the expression and advancing one
2923     matching character if failing, up to the maximum. Alternatively, if
2924     maximizing, find the maximum number of characters and work backwards. */
2925    
2926     DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2927     max, eptr));
2928    
2929     if ((ims & PCRE_CASELESS) != 0)
2930     {
2931     fc = md->lcc[fc];
2932     for (i = 1; i <= min; i++)
2933 ph10 426 {
2934     if (eptr >= md->end_subject)
2935     {
2936     SCHECK_PARTIAL();
2937 ph10 510 MRRETURN(MATCH_NOMATCH);
2938 ph10 426 }
2939 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2940 ph10 426 }
2941 nigel 77 if (min == max) continue;
2942     if (minimize)
2943     {
2944     for (fi = min;; fi++)
2945     {
2946 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2947 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2949 ph10 426 if (eptr >= md->end_subject)
2950     {
2951 ph10 427 SCHECK_PARTIAL();
2952 ph10 510 MRRETURN(MATCH_NOMATCH);
2953 ph10 426 }
2954 ph10 510 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2955 nigel 77 }
2956     /* Control never gets here */
2957     }
2958 nigel 93 else /* Maximize */
2959 nigel 77 {
2960     pp = eptr;
2961     for (i = min; i < max; i++)
2962     {
2963 ph10 463 if (eptr >= md->end_subject)
2964 ph10 462 {
2965     SCHECK_PARTIAL();
2966     break;
2967 ph10 463 }
2968 ph10 462 if (fc != md->lcc[*eptr]) break;
2969 nigel 77 eptr++;
2970     }
2971 ph10 427
2972 nigel 93 if (possessive) continue;
2973 ph10 427
2974 nigel 77 while (eptr >= pp)
2975     {
2976 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2977 nigel 77 eptr--;
2978     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2979     }
2980 ph10 510 MRRETURN(MATCH_NOMATCH);
2981 nigel 77 }
2982     /* Control never gets here */
2983     }
2984    
2985     /* Caseful comparisons (includes all multi-byte characters) */
2986    
2987     else
2988     {
2989 ph10 427 for (i = 1; i <= min; i++)
2990 ph10 426 {
2991     if (eptr >= md->end_subject)
2992     {
2993     SCHECK_PARTIAL();
2994 ph10 510 MRRETURN(MATCH_NOMATCH);
2995 ph10 426 }
2996 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2997 ph10 427 }
2998 ph10 443
2999 nigel 77 if (min == max) continue;
3000 ph10 443
3001 nigel 77 if (minimize)
3002     {
3003     for (fi = min;; fi++)
3004     {
3005 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3006 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3007 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3008 ph10 426 if (eptr >= md->end_subject)
3009 ph10 427 {
3010 ph10 426 SCHECK_PARTIAL();
3011 ph10 510 MRRETURN(MATCH_NOMATCH);
3012 ph10 427 }
3013 ph10 510 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3014 nigel 77 }
3015     /* Control never gets here */
3016     }
3017 nigel 93 else /* Maximize */
3018 nigel 77 {
3019     pp = eptr;
3020     for (i = min; i < max; i++)
3021     {
3022 ph10 463 if (eptr >= md->end_subject)
3023 ph10 462 {
3024 ph10 463 SCHECK_PARTIAL();
3025 ph10 462 break;
3026 ph10 463 }
3027 ph10 462 if (fc != *eptr) break;
3028 nigel 77 eptr++;
3029     }
3030 nigel 93 if (possessive) continue;
3031 ph10 443
3032 nigel 77 while (eptr >= pp)
3033     {
3034 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3035 nigel 77 eptr--;
3036     if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3037     }
3038 ph10 510 MRRETURN(MATCH_NOMATCH);
3039 nigel 77 }
3040     }
3041     /* Control never gets here */
3042    
3043     /* Match a negated single one-byte character. The character we are
3044     checking can be multibyte. */
3045    
3046     case OP_NOT:
3047 ph10 443 if (eptr >= md->end_subject)
3048 ph10 428 {
3049 ph10 443 SCHECK_PARTIAL();
3050 ph10 510 MRRETURN(MATCH_NOMATCH);
3051 ph10 443 }
3052 nigel 77 ecode++;
3053     GETCHARINCTEST(c, eptr);
3054     if ((ims & PCRE_CASELESS) != 0)
3055     {
3056     #ifdef SUPPORT_UTF8
3057     if (c < 256)
3058     #endif
3059     c = md->lcc[c];
3060 ph10 510 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3061 nigel 77 }
3062     else
3063     {
3064 ph10 510 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3065 nigel 77 }
3066     break;
3067    
3068     /* Match a negated single one-byte character repeatedly. This is almost a
3069     repeat of the code for a repeated single character, but I haven't found a
3070     nice way of commoning these up that doesn't require a test of the
3071     positive/negative option for each character match. Maybe that wouldn't add
3072     very much to the time taken, but character matching *is* what this is all
3073     about... */
3074    
3075     case OP_NOTEXACT:
3076     min = max = GET2(ecode, 1);
3077     ecode += 3;
3078     goto REPEATNOTCHAR;
3079    
3080     case OP_NOTUPTO:
3081     case OP_NOTMINUPTO:
3082     min = 0;
3083     max = GET2(ecode, 1);
3084     minimize = *ecode == OP_NOTMINUPTO;
3085     ecode += 3;
3086     goto REPEATNOTCHAR;
3087    
3088 nigel 93 case OP_NOTPOSSTAR:
3089     possessive = TRUE;
3090     min = 0;
3091     max = INT_MAX;
3092     ecode++;
3093     goto REPEATNOTCHAR;
3094    
3095     case OP_NOTPOSPLUS:
3096     possessive = TRUE;
3097     min = 1;
3098     max = INT_MAX;
3099     ecode++;
3100     goto REPEATNOTCHAR;
3101    
3102     case OP_NOTPOSQUERY:
3103     possessive = TRUE;
3104     min = 0;
3105     max = 1;
3106     ecode++;
3107     goto REPEATNOTCHAR;
3108    
3109     case OP_NOTPOSUPTO:
3110     possessive = TRUE;
3111     min = 0;
3112     max = GET2(ecode, 1);
3113     ecode += 3;
3114     goto REPEATNOTCHAR;
3115    
3116 nigel 77 case OP_NOTSTAR:
3117     case OP_NOTMINSTAR:
3118     case OP_NOTPLUS:
3119     case OP_NOTMINPLUS:
3120     case OP_NOTQUERY:
3121     case OP_NOTMINQUERY:
3122     c = *ecode++ - OP_NOTSTAR;
3123     minimize = (c & 1) != 0;
3124     min = rep_min[c]; /* Pick up values from tables; */
3125     max = rep_max[c]; /* zero for max => infinity */
3126     if (max == 0) max = INT_MAX;
3127    
3128 ph10 426 /* Common code for all repeated single-byte matches. */
3129 nigel 77
3130     REPEATNOTCHAR:
3131     fc = *ecode++;
3132    
3133     /* The code is duplicated for the caseless and caseful cases, for speed,
3134     since matching characters is likely to be quite common. First, ensure the
3135     minimum number of matches are present. If min = max, continue at the same
3136     level without recursing. Otherwise, if minimizing, keep trying the rest of
3137     the expression and advancing one matching character if failing, up to the
3138     maximum. Alternatively, if maximizing, find the maximum number of
3139     characters and work backwards. */
3140    
3141     DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3142     max, eptr));
3143    
3144     if ((ims & PCRE_CASELESS) != 0)
3145     {
3146     fc = md->lcc[fc];
3147    
3148     #ifdef SUPPORT_UTF8
3149     /* UTF-8 mode */
3150     if (utf8)
3151     {
3152 nigel 93 register unsigned int d;
3153 nigel 77 for (i = 1; i <= min; i++)
3154     {
3155 ph10 426 if (eptr >= md->end_subject)
3156     {
3157     SCHECK_PARTIAL();
3158 ph10 510 MRRETURN(MATCH_NOMATCH);
3159 ph10 427 }
3160 nigel 77 GETCHARINC(d, eptr);
3161     if (d < 256) d = md->lcc[d];
3162 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3163 nigel 77 }
3164     }
3165     else
3166     #endif
3167    
3168     /* Not UTF-8 mode */
3169     {
3170     for (i = 1; i <= min; i++)
3171 ph10 426 {
3172     if (eptr >= md->end_subject)
3173     {
3174     SCHECK_PARTIAL();
3175 ph10 510 MRRETURN(MATCH_NOMATCH);
3176 ph10 427 }
3177 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3178 ph10 427 }
3179 nigel 77 }
3180    
3181     if (min == max) continue;
3182    
3183     if (minimize)
3184     {
3185     #ifdef SUPPORT_UTF8
3186     /* UTF-8 mode */
3187     if (utf8)
3188     {
3189 nigel 93 register unsigned int d;
3190 nigel 77 for (fi = min;; fi++)
3191     {
3192 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3193 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3194 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3195 ph10 427 if (eptr >= md->end_subject)
3196 ph10 426 {
3197 ph10 427 SCHECK_PARTIAL();
3198 ph10 510 MRRETURN(MATCH_NOMATCH);
3199 ph10 427 }
3200 nigel 77 GETCHARINC(d, eptr);
3201     if (d < 256) d = md->lcc[d];
3202 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3203 nigel 77 }
3204     }
3205     else
3206     #endif
3207     /* Not UTF-8 mode */
3208     {
3209     for (fi = min;; fi++)
3210     {
3211 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3212 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3214 ph10 426 if (eptr >= md->end_subject)
3215     {
3216     SCHECK_PARTIAL();
3217 ph10 510 MRRETURN(MATCH_NOMATCH);
3218 ph10 426 }
3219 ph10 510 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3220 nigel 77 }
3221     }
3222     /* Control never gets here */
3223     }
3224    
3225     /* Maximize case */
3226    
3227     else
3228     {
3229     pp = eptr;
3230    
3231     #ifdef SUPPORT_UTF8
3232     /* UTF-8 mode */
3233     if (utf8)
3234     {
3235 nigel 93 register unsigned int d;
3236 nigel 77 for (i = min; i < max; i++)
3237     {
3238     int len = 1;
3239 ph10 463 if (eptr >= md->end_subject)
3240 ph10 462 {
3241 ph10 463 SCHECK_PARTIAL();
3242 ph10 462 break;
3243 ph10 463 }
3244 nigel 77 GETCHARLEN(d, eptr, len);
3245     if (d < 256) d = md->lcc[d];
3246     if (fc == d) break;
3247     eptr += len;
3248     }
3249 nigel 93 if (possessive) continue;
3250     for(;;)
3251 nigel 77 {
3252 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3253 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3254     if (eptr-- == pp) break; /* Stop if tried at original pos */
3255     BACKCHAR(eptr);
3256     }
3257     }
3258     else
3259     #endif
3260     /* Not UTF-8 mode */
3261     {
3262     for (i = min; i < max; i++)
3263     {
3264 ph10 463 if (eptr >= md->end_subject)
3265 ph10 462 {
3266     SCHECK_PARTIAL();
3267     break;
3268 ph10 463 }
3269 ph10 462 if (fc == md->lcc[*eptr]) break;
3270 nigel 77 eptr++;
3271     }
3272 nigel 93 if (possessive) continue;
3273 nigel 77 while (eptr >= pp)
3274     {
3275 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3276 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277     eptr--;
3278     }
3279     }
3280    
3281 ph10 510 MRRETURN(MATCH_NOMATCH);
3282 nigel 77 }
3283     /* Control never gets here */
3284     }
3285    
3286     /* Caseful comparisons */
3287    
3288     else
3289     {
3290     #ifdef SUPPORT_UTF8
3291     /* UTF-8 mode */
3292     if (utf8)
3293     {
3294 nigel 93 register unsigned int d;
3295 nigel 77 for (i = 1; i <= min; i++)
3296     {
3297 ph10 426 if (eptr >= md->end_subject)
3298     {
3299     SCHECK_PARTIAL();
3300 ph10 510 MRRETURN(MATCH_NOMATCH);
3301 ph10 427 }
3302 nigel 77 GETCHARINC(d, eptr);
3303 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3304 nigel 77 }
3305     }
3306     else
3307     #endif
3308     /* Not UTF-8 mode */
3309     {
3310     for (i = 1; i <= min; i++)
3311 ph10 426 {
3312     if (eptr >= md->end_subject)
3313     {
3314     SCHECK_PARTIAL();
3315 ph10 510 MRRETURN(MATCH_NOMATCH);
3316 ph10 427 }
3317 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3318 ph10 427 }
3319 nigel 77 }
3320    
3321     if (min == max) continue;
3322    
3323     if (minimize)
3324     {
3325     #ifdef SUPPORT_UTF8
3326     /* UTF-8 mode */
3327     if (utf8)
3328     {
3329 nigel 93 register unsigned int d;
3330 nigel 77 for (fi = min;; fi++)
3331     {
3332 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3333 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3334 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3335 ph10 427 if (eptr >= md->end_subject)
3336 ph10 426 {
3337 ph10 427 SCHECK_PARTIAL();
3338 ph10 510 MRRETURN(MATCH_NOMATCH);
3339 ph10 427 }
3340 nigel 77 GETCHARINC(d, eptr);
3341 ph10 510 if (fc == d) MRRETURN(MATCH_NOMATCH);
3342 nigel 77 }
3343     }
3344     else
3345     #endif
3346     /* Not UTF-8 mode */
3347     {
3348     for (fi = min;; fi++)
3349     {
3350 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3351 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3352 ph10 510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3353 ph10 426 if (eptr >= md->end_subject)
3354     {
3355     SCHECK_PARTIAL();
3356 ph10 510 MRRETURN(MATCH_NOMATCH);
3357 ph10 427 }
3358 ph10 510 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3359 nigel 77 }
3360     }
3361     /* Control never gets here */
3362     }
3363    
3364     /* Maximize case */
3365    
3366     else
3367     {
3368     pp = eptr;
3369    
3370     #ifdef SUPPORT_UTF8
3371     /* UTF-8 mode */
3372     if (utf8)
3373     {
3374 nigel 93 register unsigned int d;
3375 nigel 77 for (i = min; i < max; i++)
3376     {
3377     int len = 1;
3378 ph10 463 if (eptr >= md->end_subject)
3379 ph10 462 {
3380 ph10 463 SCHECK_PARTIAL();
3381 ph10 462 break;
3382 ph10 463 }
3383 nigel 77 GETCHARLEN(d, eptr, len);
3384     if (fc == d) break;
3385     eptr += len;
3386     }
3387 nigel 93 if (possessive) continue;
3388 nigel 77 for(;;)
3389     {
3390 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3391 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3392     if (eptr-- == pp) break; /* Stop if tried at original pos */
3393     BACKCHAR(eptr);
3394     }
3395     }
3396     else
3397     #endif
3398     /* Not UTF-8 mode */
3399     {
3400     for (i = min; i < max; i++)
3401     {
3402 ph10 463 if (eptr >= md->end_subject)
3403 ph10 462 {
3404 ph10 463 SCHECK_PARTIAL();
3405 ph10 462 break;
3406 ph10 463 }
3407 ph10 462 if (fc == *eptr) break;
3408 nigel 77 eptr++;
3409     }
3410 nigel 93 if (possessive) continue;
3411 nigel 77 while (eptr >= pp)
3412     {
3413 ph10 164 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3414 nigel 77 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3415     eptr--;
3416     }
3417     }
3418    
3419 ph10 510 MRRETURN(MATCH_NOMATCH);
3420 nigel 77 }
3421     }
3422     /* Control never gets here */
3423    
3424     /* Match a single character type repeatedly; several different opcodes
3425     share code. This is very similar to the code for single characters, but we
3426     repeat it in the interests of efficiency. */
3427    
3428     case OP_TYPEEXACT:
3429     min = max = GET2(ecode, 1);
3430     minimize = TRUE;
3431     ecode += 3;
3432     goto REPEATTYPE;
3433    
3434     case OP_TYPEUPTO:
3435     case OP_TYPEMINUPTO:
3436     min = 0;
3437     max = GET2(ecode, 1);
3438     minimize = *ecode == OP_TYPEMINUPTO;
3439     ecode += 3;
3440     goto REPEATTYPE;
3441    
3442 nigel 93 case OP_TYPEPOSSTAR:
3443     possessive = TRUE;
3444     min = 0;
3445     max = INT_MAX;
3446     ecode++;
3447     goto REPEATTYPE;
3448    
3449     case OP_TYPEPOSPLUS:
3450     possessive = TRUE;
3451     min = 1;
3452     max = INT_MAX;
3453     ecode++;
3454     goto REPEATTYPE;
3455    
3456     case OP_TYPEPOSQUERY:
3457     possessive = TRUE;
3458     min = 0;
3459     max = 1;
3460     ecode++;
3461     goto REPEATTYPE;
3462    
3463     case OP_TYPEPOSUPTO:
3464     possessive = TRUE;
3465     min = 0;
3466     max = GET2(ecode, 1);
3467     ecode += 3;
3468     goto REPEATTYPE;
3469    
3470 nigel 77 case OP_TYPESTAR:
3471     case OP_TYPEMINSTAR:
3472     case OP_TYPEPLUS:
3473     case OP_TYPEMINPLUS:
3474     case OP_TYPEQUERY:
3475     case OP_TYPEMINQUERY:
3476     c = *ecode++ - OP_TYPESTAR;
3477     minimize = (c & 1) != 0;
3478     min = rep_min[c]; /* Pick up values from tables; */
3479     max = rep_max[c]; /* zero for max => infinity */
3480     if (max == 0) max = INT_MAX;
3481    
3482     /* Common code for all repeated single character type matches. Note that
3483     in UTF-8 mode, '.' matches a character of any length, but for the other
3484     character types, the valid characters are all one-byte long. */
3485    
3486     REPEATTYPE:
3487     ctype = *ecode++; /* Code for the character type */
3488    
3489     #ifdef SUPPORT_UCP
3490     if (ctype == OP_PROP || ctype == OP_NOTPROP)
3491     {
3492     prop_fail_result = ctype == OP_NOTPROP;
3493     prop_type = *ecode++;
3494 nigel 87 prop_value = *ecode++;
3495 nigel 77 }
3496     else prop_type = -1;
3497     #endif
3498    
3499     /* First, ensure the minimum number of matches are present. Use inline
3500     code for maximizing the speed, and do the type test once at the start
3501 ph10 426 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3502 nigel 77 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3503     and single-bytes. */
3504    
3505     if (min > 0)
3506     {
3507     #ifdef SUPPORT_UCP
3508 nigel 87 if (prop_type >= 0)
3509 nigel 77 {
3510 nigel 87 switch(prop_type)
3511 nigel 77 {
3512 nigel 87 case PT_ANY:
3513 ph10 510 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3514 nigel 87 for (i = 1; i <= min; i++)
3515     {
3516 ph10 427 if (eptr >= md->end_subject)
3517 ph10 426 {
3518 ph10 427 SCHECK_PARTIAL();
3519 ph10 510 MRRETURN(MATCH_NOMATCH);
3520 ph10 427 }
3521 ph10 184 GETCHARINCTEST(c, eptr);
3522 nigel 87 }
3523     break;
3524    
3525     case PT_LAMP:
3526     for (i = 1; i <= min; i++)
3527     {
3528 ph10 427 if (eptr >= md->end_subject)
3529 ph10 426 {
3530 ph10 427 SCHECK_PARTIAL();
3531 ph10 510 MRRETURN(MATCH_NOMATCH);
3532 ph10 427 }
3533 ph10 184 GETCHARINCTEST(c, eptr);
3534 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3535 nigel 87 if ((prop_chartype == ucp_Lu ||
3536     prop_chartype == ucp_Ll ||
3537     prop_chartype == ucp_Lt) == prop_fail_result)
3538 ph10 510 MRRETURN(MATCH_NOMATCH);
3539 nigel 87 }
3540     break;
3541    
3542     case PT_GC:
3543     for (i = 1; i <= min; i++)
3544     {
3545 ph10 427 if (eptr >= md->end_subject)
3546 ph10 426 {
3547 ph10 427 SCHECK_PARTIAL();
3548 ph10 510 MRRETURN(MATCH_NOMATCH);
3549 ph10 427 }
3550 ph10 184 GETCHARINCTEST(c, eptr);
3551 ph10 349 prop_category = UCD_CATEGORY(c);
3552 nigel 87 if ((prop_category == prop_value) == prop_fail_result)
3553 ph10 510 MRRETURN(MATCH_NOMATCH);
3554 nigel 87 }
3555     break;
3556    
3557     case PT_PC:
3558     for (i = 1; i <= min; i++)
3559     {
3560 ph10 427 if (eptr >= md->end_subject)
3561 ph10 426 {
3562 ph10 427 SCHECK_PARTIAL();
3563 ph10 510 MRRETURN(MATCH_NOMATCH);
3564 ph10 427 }
3565 ph10 184 GETCHARINCTEST(c, eptr);
3566 ph10 349 prop_chartype = UCD_CHARTYPE(c);
3567 nigel 87 if ((prop_chartype == prop_value) == prop_fail_result)
3568 ph10 510 MRRETURN(MATCH_NOMATCH);
3569 nigel 87 }
3570     break;
3571    
3572     case PT_SC:
3573     for (i = 1; i <= min; i++)
3574     {
3575 ph10 427 if (eptr >= md->end_subject)
3576 ph10 426 {
3577 ph10 427 SCHECK_PARTIAL();
3578 ph10 510 MRRETURN(MATCH_NOMATCH);
3579 ph10 427 }
3580 ph10 184 GETCHARINCTEST(c, eptr);
3581 ph10 349 prop_script = UCD_SCRIPT(c);
3582 nigel 87 if ((prop_script == prop_value) == prop_fail_result)
3583 ph10 510 MRRETURN(MATCH_NOMATCH);
3584 nigel 87 }
3585     break;
3586 ph10 527
3587 ph10 517 case PT_ALNUM:
3588     for (i = 1; i <= min; i++)
3589     {
3590     if (eptr >= md->end_subject)
3591     {
3592     SCHECK_PARTIAL();
3593     MRRETURN(MATCH_NOMATCH);
3594     }
3595     GETCHARINCTEST(c, eptr);
3596 ph10 527 prop_category = UCD_CATEGORY(c);
3597     if ((prop_category == ucp_L || prop_category == ucp_N)
3598 ph10 517 == prop_fail_result)
3599     MRRETURN(MATCH_NOMATCH);
3600     }
3601     break;
3602 ph10 527
3603 ph10 517 case PT_SPACE: /* Perl space */
3604     for (i = 1; i <= min; i++)
3605     {
3606     if (eptr >= md->end_subject)
3607     {
3608     SCHECK_PARTIAL();
3609     MRRETURN(MATCH_NOMATCH);
3610     }
3611     GETCHARINCTEST(c, eptr);
3612 ph10 527 prop_category = UCD_CATEGORY(c);
3613     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3614     c == CHAR_FF || c == CHAR_CR)
3615 ph10 517 == prop_fail_result)
3616     MRRETURN(MATCH_NOMATCH);
3617     }
3618     break;
3619 ph10 527
3620 ph10 517 case PT_PXSPACE: /* POSIX space */
3621     for (i = 1; i <= min; i++)
3622     {
3623     if (eptr >= md->end_subject)
3624     {
3625     SCHECK_PARTIAL();
3626     MRRETURN(MATCH_NOMATCH);
3627     }
3628     GETCHARINCTEST(c, eptr);
3629 ph10 527 prop_category = UCD_CATEGORY(c);
3630     if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3631     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3632 ph10 517 == prop_fail_result)
3633     MRRETURN(MATCH_NOMATCH);
3634     }
3635     break;
3636 ph10 527
3637     case PT_WORD:
3638 ph10 517 for (i = 1; i <= min; i++)
3639     {
3640     if (eptr >= md->end_subject)
3641     {
3642     SCHECK_PARTIAL();
3643     MRRETURN(MATCH_NOMATCH);
3644     }
3645     GETCHARINCTEST(c, eptr);
3646 ph10 527 prop_category = UCD_CATEGORY(c);
3647 ph10 517 if ((prop_category == ucp_L || prop_category == ucp_N ||
3648 ph10 527 c == CHAR_UNDERSCORE)
3649 ph10 517 == prop_fail_result)
3650     MRRETURN(MATCH_NOMATCH);
3651     }
3652     break;
3653 ph10 527
3654 ph10 517 /* This should not occur */
3655 nigel 87
3656     default:
3657     RRETURN(PCRE_ERROR_INTERNAL);
3658 nigel 77 }
3659     }
3660    
3661     /* Match extended Unicode sequences. We will get here only if the
3662     support is in the binary; otherwise a compile-time error occurs. */
3663    
3664     else if (ctype == OP_EXTUNI)
3665     {
3666     for (i = 1; i <= min; i++)
3667     {
3668 ph10 427 if (eptr >= md->end_subject)
3669 ph10 426 {
3670 ph10 427 SCHECK_PARTIAL();
3671 ph10 510 MRRETURN(MATCH_NOMATCH);
3672 ph10 427 }
3673 nigel 77 GETCHARINCTEST(c, eptr);
3674 ph10 349 prop_category = UCD_CATEGORY(c);
3675 ph10 510 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3676 nigel 77 while (eptr < md->end_subject)
3677     {
3678     int len = 1;
3679 ph10 426 if (!utf8) c = *eptr;
3680     else { GETCHARLEN(c, eptr, len); }
3681 ph10 349 prop_category = UCD_CATEGORY(c);
3682 nigel 77 if (prop_category != ucp_M) break;
3683     eptr += len;
3684     }
3685     }
3686     }
3687    
3688     else
3689     #endif /* SUPPORT_UCP */
3690    
3691     /* Handle all other cases when the coding is UTF-8 */
3692    
3693     #ifdef SUPPORT_UTF8
3694     if (utf8) switch(ctype)
3695     {
3696     case OP_ANY:
3697     for (i = 1; i <= min; i++)
3698     {
3699 ph10 426 if (eptr >= md->end_subject)
3700     {
3701 ph10 427 SCHECK_PARTIAL();
3702 ph10 510 MRRETURN(MATCH_NOMATCH);
3703 ph10 427 }
3704 ph10 510 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3705 nigel 91 eptr++;
3706 nigel 77 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3707     }
3708     break;
3709    
3710 ph10 341 case OP_ALLANY:
3711     for (i = 1; i <= min; i++)
3712     {
3713 ph10 427 if (eptr >= md->end_subject)
3714 ph10 426 {
3715     SCHECK_PARTIAL();
3716 ph10 510 MRRETURN(MATCH_NOMATCH);
3717 ph10 427 }
3718 ph10 341 eptr++;
3719     while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3720     }
3721     break;
3722    
3723 nigel 77 case OP_ANYBYTE:
3724 ph10 510 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3725 nigel 77 eptr += min;
3726     break;
3727    
3728 nigel 93 case OP_ANYNL:
3729     for (i = 1; i <= min; i++)
3730     {
3731 ph10 427 if (eptr >= md->end_subject)
3732 ph10 426 {
3733     SCHECK_PARTIAL();
3734 ph10 510 MRRETURN(MATCH_NOMATCH);
3735 ph10 427 }
3736 nigel 93 GETCHARINC(c, eptr);
3737     switch(c)
3738     {
3739 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3740 nigel 93 case 0x000d:
3741     if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3742     break;
3743 ph10 231
3744 nigel 93 case 0x000a:
3745 ph10 231 break;
3746    
3747 nigel 93 case 0x000b:
3748     case 0x000c:
3749     case 0x0085:
3750     case 0x2028:
3751     case 0x2029:
3752 ph10 510 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3753 nigel 93 break;
3754     }
3755     }
3756     break;
3757    
3758 ph10 178 case OP_NOT_HSPACE:
3759     for (i = 1; i <= min; i++)
3760     {
3761 ph10 427 if (eptr >= md->end_subject)
3762 ph10 426 {
3763     SCHECK_PARTIAL();
3764 ph10 510 MRRETURN(MATCH_NOMATCH);
3765 ph10 427 }
3766 ph10 178 GETCHARINC(c, eptr);
3767     switch(c)
3768     {
3769     default: break;
3770     case 0x09: /* HT */
3771     case 0x20: /* SPACE */
3772     case 0xa0: /* NBSP */
3773     case 0x1680: /* OGHAM SPACE MARK */
3774     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3775     case 0x2000: /* EN QUAD */
3776     case 0x2001: /* EM QUAD */
3777     case 0x2002: /* EN SPACE */
3778     case 0x2003: /* EM SPACE */
3779     case 0x2004: /* THREE-PER-EM SPACE */
3780     case 0x2005: /* FOUR-PER-EM SPACE */
3781     case 0x2006: /* SIX-PER-EM SPACE */
3782     case 0x2007: /* FIGURE SPACE */
3783     case 0x2008: /* PUNCTUATION SPACE */
3784     case 0x2009: /* THIN SPACE */
3785     case 0x200A: /* HAIR SPACE */
3786     case 0x202f: /* NARROW NO-BREAK SPACE */
3787     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3788     case 0x3000: /* IDEOGRAPHIC SPACE */
3789 ph10 510 MRRETURN(MATCH_NOMATCH);
3790 ph10 178 }
3791     }
3792     break;
3793 ph10 182
3794 ph10 178 case OP_HSPACE:
3795     for (i = 1; i <= min; i++)
3796     {
3797 ph10 427 if (eptr >= md->end_subject)
3798 ph10 426 {
3799 ph10 427 SCHECK_PARTIAL();
3800 ph10 510 MRRETURN(MATCH_NOMATCH);
3801 ph10 427 }
3802 ph10 178 GETCHARINC(c, eptr);
3803     switch(c)
3804     {
3805 ph10 510 default: MRRETURN(MATCH_NOMATCH);
3806 ph10 178 case 0x09: /* HT */
3807     case 0x20: /* SPACE */
3808     case 0xa0: /* NBSP */
3809     case 0x1680: /* OGHAM SPACE MARK */
3810     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3811     case 0x2000: /* EN QUAD */
3812     case 0x2001: /* EM QUAD */
3813     case 0x2002: /* EN SPACE */
3814     case 0x2003: /* EM SPACE */
3815     case 0x2004: /* THREE-PER-EM SPACE */
3816     case 0x2005: /* FOUR-PER-EM SPACE */
3817     case 0x2006: /* SIX-PER-EM SPACE */
3818     case 0x2007: /* FIGURE SPACE */
3819     case 0x2008: /* PUNCTUATION SPACE */
3820     case 0x2009: /* THIN SPACE */
3821     case 0x200A: /* HAIR SPACE */
3822     case 0x202f: /* NARROW NO-BREAK SPACE */
3823     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3824     case 0x3000: /* IDEOGRAPHIC SPACE */
3825     break;
3826     }
3827     }
3828     break;
3829 ph10 182
3830 ph10 178 case OP_NOT_VSPACE:
3831     for (i = 1; i <= min; i++)
3832     {
3833 ph10 427 if (eptr >= md->end_subject)
3834 ph10 426 {
3835 ph10 427 SCHECK_PARTIAL();
3836 ph10 510 MRRETURN(MATCH_NOMATCH);
3837 ph10 427 }
3838 ph10 178 GETCHARINC(c, eptr);
3839     switch(c)
3840     {
3841     default: break;
3842     case 0x0a: /* LF */
3843     case 0x0b: /* VT */
3844     case 0x0c: /* FF */
3845     case 0x0d: /* CR */
3846     case 0x85: /* NEL */
3847     case 0x2028: /* LINE SEPARATOR */
3848     case 0x2029: /* PARAGRAPH SEPARATOR */
3849 ph10 510 MRRETURN(MATCH_NOMATCH);
3850 ph10 178 }
3851     }