/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 459 - (show annotations) (download)
Sun Oct 4 09:21:39 2009 UTC (4 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 167421 byte(s)
Fix problems with conditional references to duplicate named subpatterns.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_COMMIT (-999)
75 #define MATCH_PRUNE (-998)
76 #define MATCH_SKIP (-997)
77 #define MATCH_THEN (-996)
78
79 /* Maximum number of ints of offset to save on the stack for recursive calls.
80 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81 because the offset vector is always a multiple of 3 long. */
82
83 #define REC_STACK_SAVE_MAX 30
84
85 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
86
87 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
88 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
89
90
91
92 #ifdef DEBUG
93 /*************************************************
94 * Debugging function to print chars *
95 *************************************************/
96
97 /* Print a sequence of chars in printable format, stopping at the end of the
98 subject if the requested.
99
100 Arguments:
101 p points to characters
102 length number to print
103 is_subject TRUE if printing from within md->start_subject
104 md pointer to matching data block, if is_subject is TRUE
105
106 Returns: nothing
107 */
108
109 static void
110 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
111 {
112 unsigned int c;
113 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
114 while (length-- > 0)
115 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
116 }
117 #endif
118
119
120
121 /*************************************************
122 * Match a back-reference *
123 *************************************************/
124
125 /* If a back reference hasn't been set, the length that is passed is greater
126 than the number of characters left in the string, so the match fails.
127
128 Arguments:
129 offset index into the offset vector
130 eptr points into the subject
131 length length to be matched
132 md points to match data block
133 ims the ims flags
134
135 Returns: TRUE if matched
136 */
137
138 static BOOL
139 match_ref(int offset, register USPTR eptr, int length, match_data *md,
140 unsigned long int ims)
141 {
142 USPTR p = md->start_subject + md->offset_vector[offset];
143
144 #ifdef DEBUG
145 if (eptr >= md->end_subject)
146 printf("matching subject <null>");
147 else
148 {
149 printf("matching subject ");
150 pchars(eptr, length, TRUE, md);
151 }
152 printf(" against backref ");
153 pchars(p, length, FALSE, md);
154 printf("\n");
155 #endif
156
157 /* Always fail if not enough characters left */
158
159 if (length > md->end_subject - eptr) return FALSE;
160
161 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162 properly if Unicode properties are supported. Otherwise, we can check only
163 ASCII characters. */
164
165 if ((ims & PCRE_CASELESS) != 0)
166 {
167 #ifdef SUPPORT_UTF8
168 #ifdef SUPPORT_UCP
169 if (md->utf8)
170 {
171 USPTR endptr = eptr + length;
172 while (eptr < endptr)
173 {
174 int c, d;
175 GETCHARINC(c, eptr);
176 GETCHARINC(d, p);
177 if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178 }
179 }
180 else
181 #endif
182 #endif
183
184 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185 is no UCP support. */
186
187 while (length-- > 0)
188 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189 }
190
191 /* In the caseful case, we can just compare the bytes, whether or not we
192 are in UTF-8 mode. */
193
194 else
195 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196
197 return TRUE;
198 }
199
200
201
202 /***************************************************************************
203 ****************************************************************************
204 RECURSION IN THE match() FUNCTION
205
206 The match() function is highly recursive, though not every recursive call
207 increases the recursive depth. Nevertheless, some regular expressions can cause
208 it to recurse to a great depth. I was writing for Unix, so I just let it call
209 itself recursively. This uses the stack for saving everything that has to be
210 saved for a recursive call. On Unix, the stack can be large, and this works
211 fine.
212
213 It turns out that on some non-Unix-like systems there are problems with
214 programs that use a lot of stack. (This despite the fact that every last chip
215 has oodles of memory these days, and techniques for extending the stack have
216 been known for decades.) So....
217
218 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
219 calls by keeping local variables that need to be preserved in blocks of memory
220 obtained from malloc() instead instead of on the stack. Macros are used to
221 achieve this so that the actual code doesn't look very different to what it
222 always used to.
223
224 The original heap-recursive code used longjmp(). However, it seems that this
225 can be very slow on some operating systems. Following a suggestion from Stan
226 Switzer, the use of longjmp() has been abolished, at the cost of having to
227 provide a unique number for each call to RMATCH. There is no way of generating
228 a sequence of numbers at compile time in C. I have given them names, to make
229 them stand out more clearly.
230
231 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
232 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
233 tests. Furthermore, not using longjmp() means that local dynamic variables
234 don't have indeterminate values; this has meant that the frame size can be
235 reduced because the result can be "passed back" by straight setting of the
236 variable instead of being passed in the frame.
237 ****************************************************************************
238 ***************************************************************************/
239
240 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
241 below must be updated in sync. */
242
243 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
244 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
245 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
246 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
247 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
248 RM51, RM52, RM53, RM54 };
249
250 /* These versions of the macros use the stack, as normal. There are debugging
251 versions and production versions. Note that the "rw" argument of RMATCH isn't
252 actuall used in this definition. */
253
254 #ifndef NO_RECURSE
255 #define REGISTER register
256
257 #ifdef DEBUG
258 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
259 { \
260 printf("match() called in line %d\n", __LINE__); \
261 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
262 printf("to line %d\n", __LINE__); \
263 }
264 #define RRETURN(ra) \
265 { \
266 printf("match() returned %d from line %d ", ra, __LINE__); \
267 return ra; \
268 }
269 #else
270 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
271 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
272 #define RRETURN(ra) return ra
273 #endif
274
275 #else
276
277
278 /* These versions of the macros manage a private stack on the heap. Note that
279 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
280 argument of match(), which never changes. */
281
282 #define REGISTER
283
284 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
285 {\
286 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
287 frame->Xwhere = rw; \
288 newframe->Xeptr = ra;\
289 newframe->Xecode = rb;\
290 newframe->Xmstart = mstart;\
291 newframe->Xoffset_top = rc;\
292 newframe->Xims = re;\
293 newframe->Xeptrb = rf;\
294 newframe->Xflags = rg;\
295 newframe->Xrdepth = frame->Xrdepth + 1;\
296 newframe->Xprevframe = frame;\
297 frame = newframe;\
298 DPRINTF(("restarting from line %d\n", __LINE__));\
299 goto HEAP_RECURSE;\
300 L_##rw:\
301 DPRINTF(("jumped back to line %d\n", __LINE__));\
302 }
303
304 #define RRETURN(ra)\
305 {\
306 heapframe *newframe = frame;\
307 frame = newframe->Xprevframe;\
308 (pcre_stack_free)(newframe);\
309 if (frame != NULL)\
310 {\
311 rrc = ra;\
312 goto HEAP_RETURN;\
313 }\
314 return ra;\
315 }
316
317
318 /* Structure for remembering the local variables in a private frame */
319
320 typedef struct heapframe {
321 struct heapframe *Xprevframe;
322
323 /* Function arguments that may change */
324
325 USPTR Xeptr;
326 const uschar *Xecode;
327 USPTR Xmstart;
328 int Xoffset_top;
329 long int Xims;
330 eptrblock *Xeptrb;
331 int Xflags;
332 unsigned int Xrdepth;
333
334 /* Function local variables */
335
336 USPTR Xcallpat;
337 #ifdef SUPPORT_UTF8
338 USPTR Xcharptr;
339 #endif
340 USPTR Xdata;
341 USPTR Xnext;
342 USPTR Xpp;
343 USPTR Xprev;
344 USPTR Xsaved_eptr;
345
346 recursion_info Xnew_recursive;
347
348 BOOL Xcur_is_word;
349 BOOL Xcondition;
350 BOOL Xprev_is_word;
351
352 unsigned long int Xoriginal_ims;
353
354 #ifdef SUPPORT_UCP
355 int Xprop_type;
356 int Xprop_value;
357 int Xprop_fail_result;
358 int Xprop_category;
359 int Xprop_chartype;
360 int Xprop_script;
361 int Xoclength;
362 uschar Xocchars[8];
363 #endif
364
365 int Xcodelink;
366 int Xctype;
367 unsigned int Xfc;
368 int Xfi;
369 int Xlength;
370 int Xmax;
371 int Xmin;
372 int Xnumber;
373 int Xoffset;
374 int Xop;
375 int Xsave_capture_last;
376 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
377 int Xstacksave[REC_STACK_SAVE_MAX];
378
379 eptrblock Xnewptrb;
380
381 /* Where to jump back to */
382
383 int Xwhere;
384
385 } heapframe;
386
387 #endif
388
389
390 /***************************************************************************
391 ***************************************************************************/
392
393
394
395 /*************************************************
396 * Match from current position *
397 *************************************************/
398
399 /* This function is called recursively in many circumstances. Whenever it
400 returns a negative (error) response, the outer incarnation must also return the
401 same response. */
402
403 /* These macros pack up tests that are used for partial matching, and which
404 appears several times in the code. We set the "hit end" flag if the pointer is
405 at the end of the subject and also past the start of the subject (i.e.
406 something has been matched). For hard partial matching, we then return
407 immediately. The second one is used when we already know we are past the end of
408 the subject. */
409
410 #define CHECK_PARTIAL()\
411 if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412 {\
413 md->hitend = TRUE;\
414 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415 }
416
417 #define SCHECK_PARTIAL()\
418 if (md->partial && eptr > mstart)\
419 {\
420 md->hitend = TRUE;\
421 if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422 }
423
424
425 /* Performance note: It might be tempting to extract commonly used fields from
426 the md structure (e.g. utf8, end_subject) into individual variables to improve
427 performance. Tests using gcc on a SPARC disproved this; in the first case, it
428 made performance worse.
429
430 Arguments:
431 eptr pointer to current character in subject
432 ecode pointer to current position in compiled code
433 mstart pointer to the current match start position (can be modified
434 by encountering \K)
435 offset_top current top pointer
436 md pointer to "static" info for the match
437 ims current /i, /m, and /s options
438 eptrb pointer to chain of blocks containing eptr at start of
439 brackets - for testing for empty matches
440 flags can contain
441 match_condassert - this is an assertion condition
442 match_cbegroup - this is the start of an unlimited repeat
443 group that can match an empty string
444 rdepth the recursion depth
445
446 Returns: MATCH_MATCH if matched ) these values are >= 0
447 MATCH_NOMATCH if failed to match )
448 a negative PCRE_ERROR_xxx value if aborted by an error condition
449 (e.g. stopped by repeated call or recursion limit)
450 */
451
452 static int
453 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455 int flags, unsigned int rdepth)
456 {
457 /* These variables do not need to be preserved over recursion in this function,
458 so they can be ordinary variables in all cases. Mark some of them with
459 "register" because they are used a lot in loops. */
460
461 register int rrc; /* Returns from recursive calls */
462 register int i; /* Used for loops not involving calls to RMATCH() */
463 register unsigned int c; /* Character values not kept over RMATCH() calls */
464 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
465
466 BOOL minimize, possessive; /* Quantifier options */
467 int condcode;
468
469 /* When recursion is not being used, all "local" variables that have to be
470 preserved over calls to RMATCH() are part of a "frame" which is obtained from
471 heap storage. Set up the top-level frame here; others are obtained from the
472 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
473
474 #ifdef NO_RECURSE
475 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
476 frame->Xprevframe = NULL; /* Marks the top level */
477
478 /* Copy in the original argument variables */
479
480 frame->Xeptr = eptr;
481 frame->Xecode = ecode;
482 frame->Xmstart = mstart;
483 frame->Xoffset_top = offset_top;
484 frame->Xims = ims;
485 frame->Xeptrb = eptrb;
486 frame->Xflags = flags;
487 frame->Xrdepth = rdepth;
488
489 /* This is where control jumps back to to effect "recursion" */
490
491 HEAP_RECURSE:
492
493 /* Macros make the argument variables come from the current frame */
494
495 #define eptr frame->Xeptr
496 #define ecode frame->Xecode
497 #define mstart frame->Xmstart
498 #define offset_top frame->Xoffset_top
499 #define ims frame->Xims
500 #define eptrb frame->Xeptrb
501 #define flags frame->Xflags
502 #define rdepth frame->Xrdepth
503
504 /* Ditto for the local variables */
505
506 #ifdef SUPPORT_UTF8
507 #define charptr frame->Xcharptr
508 #endif
509 #define callpat frame->Xcallpat
510 #define codelink frame->Xcodelink
511 #define data frame->Xdata
512 #define next frame->Xnext
513 #define pp frame->Xpp
514 #define prev frame->Xprev
515 #define saved_eptr frame->Xsaved_eptr
516
517 #define new_recursive frame->Xnew_recursive
518
519 #define cur_is_word frame->Xcur_is_word
520 #define condition frame->Xcondition
521 #define prev_is_word frame->Xprev_is_word
522
523 #define original_ims frame->Xoriginal_ims
524
525 #ifdef SUPPORT_UCP
526 #define prop_type frame->Xprop_type
527 #define prop_value frame->Xprop_value
528 #define prop_fail_result frame->Xprop_fail_result
529 #define prop_category frame->Xprop_category
530 #define prop_chartype frame->Xprop_chartype
531 #define prop_script frame->Xprop_script
532 #define oclength frame->Xoclength
533 #define occhars frame->Xocchars
534 #endif
535
536 #define ctype frame->Xctype
537 #define fc frame->Xfc
538 #define fi frame->Xfi
539 #define length frame->Xlength
540 #define max frame->Xmax
541 #define min frame->Xmin
542 #define number frame->Xnumber
543 #define offset frame->Xoffset
544 #define op frame->Xop
545 #define save_capture_last frame->Xsave_capture_last
546 #define save_offset1 frame->Xsave_offset1
547 #define save_offset2 frame->Xsave_offset2
548 #define save_offset3 frame->Xsave_offset3
549 #define stacksave frame->Xstacksave
550
551 #define newptrb frame->Xnewptrb
552
553 /* When recursion is being used, local variables are allocated on the stack and
554 get preserved during recursion in the normal way. In this environment, fi and
555 i, and fc and c, can be the same variables. */
556
557 #else /* NO_RECURSE not defined */
558 #define fi i
559 #define fc c
560
561
562 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
563 const uschar *charptr; /* in small blocks of the code. My normal */
564 #endif /* style of coding would have declared */
565 const uschar *callpat; /* them within each of those blocks. */
566 const uschar *data; /* However, in order to accommodate the */
567 const uschar *next; /* version of this code that uses an */
568 USPTR pp; /* external "stack" implemented on the */
569 const uschar *prev; /* heap, it is easier to declare them all */
570 USPTR saved_eptr; /* here, so the declarations can be cut */
571 /* out in a block. The only declarations */
572 recursion_info new_recursive; /* within blocks below are for variables */
573 /* that do not have to be preserved over */
574 BOOL cur_is_word; /* a recursive call to RMATCH(). */
575 BOOL condition;
576 BOOL prev_is_word;
577
578 unsigned long int original_ims;
579
580 #ifdef SUPPORT_UCP
581 int prop_type;
582 int prop_value;
583 int prop_fail_result;
584 int prop_category;
585 int prop_chartype;
586 int prop_script;
587 int oclength;
588 uschar occhars[8];
589 #endif
590
591 int codelink;
592 int ctype;
593 int length;
594 int max;
595 int min;
596 int number;
597 int offset;
598 int op;
599 int save_capture_last;
600 int save_offset1, save_offset2, save_offset3;
601 int stacksave[REC_STACK_SAVE_MAX];
602
603 eptrblock newptrb;
604 #endif /* NO_RECURSE */
605
606 /* These statements are here to stop the compiler complaining about unitialized
607 variables. */
608
609 #ifdef SUPPORT_UCP
610 prop_value = 0;
611 prop_fail_result = 0;
612 #endif
613
614
615 /* This label is used for tail recursion, which is used in a few cases even
616 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
617 used. Thanks to Ian Taylor for noticing this possibility and sending the
618 original patch. */
619
620 TAIL_RECURSE:
621
622 /* OK, now we can get on with the real code of the function. Recursive calls
623 are specified by the macro RMATCH and RRETURN is used to return. When
624 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
625 and a "return", respectively (possibly with some debugging if DEBUG is
626 defined). However, RMATCH isn't like a function call because it's quite a
627 complicated macro. It has to be used in one particular way. This shouldn't,
628 however, impact performance when true recursion is being used. */
629
630 #ifdef SUPPORT_UTF8
631 utf8 = md->utf8; /* Local copy of the flag */
632 #else
633 utf8 = FALSE;
634 #endif
635
636 /* First check that we haven't called match() too many times, or that we
637 haven't exceeded the recursive call limit. */
638
639 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
640 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
641
642 original_ims = ims; /* Save for resetting on ')' */
643
644 /* At the start of a group with an unlimited repeat that may match an empty
645 string, the match_cbegroup flag is set. When this is the case, add the current
646 subject pointer to the chain of such remembered pointers, to be checked when we
647 hit the closing ket, in order to break infinite loops that match no characters.
648 When match() is called in other circumstances, don't add to the chain. The
649 match_cbegroup flag must NOT be used with tail recursion, because the memory
650 block that is used is on the stack, so a new one may be required for each
651 match(). */
652
653 if ((flags & match_cbegroup) != 0)
654 {
655 newptrb.epb_saved_eptr = eptr;
656 newptrb.epb_prev = eptrb;
657 eptrb = &newptrb;
658 }
659
660 /* Now start processing the opcodes. */
661
662 for (;;)
663 {
664 minimize = possessive = FALSE;
665 op = *ecode;
666
667 switch(op)
668 {
669 case OP_FAIL:
670 RRETURN(MATCH_NOMATCH);
671
672 case OP_PRUNE:
673 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
674 ims, eptrb, flags, RM51);
675 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676 RRETURN(MATCH_PRUNE);
677
678 case OP_COMMIT:
679 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
680 ims, eptrb, flags, RM52);
681 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682 RRETURN(MATCH_COMMIT);
683
684 case OP_SKIP:
685 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
686 ims, eptrb, flags, RM53);
687 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
688 md->start_match_ptr = eptr; /* Pass back current position */
689 RRETURN(MATCH_SKIP);
690
691 case OP_THEN:
692 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
693 ims, eptrb, flags, RM54);
694 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
695 RRETURN(MATCH_THEN);
696
697 /* Handle a capturing bracket. If there is space in the offset vector, save
698 the current subject position in the working slot at the top of the vector.
699 We mustn't change the current values of the data slot, because they may be
700 set from a previous iteration of this group, and be referred to by a
701 reference inside the group.
702
703 If the bracket fails to match, we need to restore this value and also the
704 values of the final offsets, in case they were set by a previous iteration
705 of the same bracket.
706
707 If there isn't enough space in the offset vector, treat this as if it were
708 a non-capturing bracket. Don't worry about setting the flag for the error
709 case here; that is handled in the code for KET. */
710
711 case OP_CBRA:
712 case OP_SCBRA:
713 number = GET2(ecode, 1+LINK_SIZE);
714 offset = number << 1;
715
716 #ifdef DEBUG
717 printf("start bracket %d\n", number);
718 printf("subject=");
719 pchars(eptr, 16, TRUE, md);
720 printf("\n");
721 #endif
722
723 if (offset < md->offset_max)
724 {
725 save_offset1 = md->offset_vector[offset];
726 save_offset2 = md->offset_vector[offset+1];
727 save_offset3 = md->offset_vector[md->offset_end - number];
728 save_capture_last = md->capture_last;
729
730 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
731 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
732
733 flags = (op == OP_SCBRA)? match_cbegroup : 0;
734 do
735 {
736 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
737 ims, eptrb, flags, RM1);
738 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
739 md->capture_last = save_capture_last;
740 ecode += GET(ecode, 1);
741 }
742 while (*ecode == OP_ALT);
743
744 DPRINTF(("bracket %d failed\n", number));
745
746 md->offset_vector[offset] = save_offset1;
747 md->offset_vector[offset+1] = save_offset2;
748 md->offset_vector[md->offset_end - number] = save_offset3;
749
750 RRETURN(MATCH_NOMATCH);
751 }
752
753 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
754 as a non-capturing bracket. */
755
756 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
757 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
758
759 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
760
761 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
762 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
763
764 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
765 final alternative within the brackets, we would return the result of a
766 recursive call to match() whatever happened. We can reduce stack usage by
767 turning this into a tail recursion, except in the case when match_cbegroup
768 is set.*/
769
770 case OP_BRA:
771 case OP_SBRA:
772 DPRINTF(("start non-capturing bracket\n"));
773 flags = (op >= OP_SBRA)? match_cbegroup : 0;
774 for (;;)
775 {
776 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
777 {
778 if (flags == 0) /* Not a possibly empty group */
779 {
780 ecode += _pcre_OP_lengths[*ecode];
781 DPRINTF(("bracket 0 tail recursion\n"));
782 goto TAIL_RECURSE;
783 }
784
785 /* Possibly empty group; can't use tail recursion. */
786
787 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
788 eptrb, flags, RM48);
789 RRETURN(rrc);
790 }
791
792 /* For non-final alternatives, continue the loop for a NOMATCH result;
793 otherwise return. */
794
795 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
796 eptrb, flags, RM2);
797 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
798 ecode += GET(ecode, 1);
799 }
800 /* Control never reaches here. */
801
802 /* Conditional group: compilation checked that there are no more than
803 two branches. If the condition is false, skipping the first branch takes us
804 past the end if there is only one branch, but that's OK because that is
805 exactly what going to the ket would do. As there is only one branch to be
806 obeyed, we can use tail recursion to avoid using another stack frame. */
807
808 case OP_COND:
809 case OP_SCOND:
810 codelink= GET(ecode, 1);
811
812 /* Because of the way auto-callout works during compile, a callout item is
813 inserted between OP_COND and an assertion condition. */
814
815 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816 {
817 if (pcre_callout != NULL)
818 {
819 pcre_callout_block cb;
820 cb.version = 1; /* Version 1 of the callout block */
821 cb.callout_number = ecode[LINK_SIZE+2];
822 cb.offset_vector = md->offset_vector;
823 cb.subject = (PCRE_SPTR)md->start_subject;
824 cb.subject_length = md->end_subject - md->start_subject;
825 cb.start_match = mstart - md->start_subject;
826 cb.current_position = eptr - md->start_subject;
827 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829 cb.capture_top = offset_top/2;
830 cb.capture_last = md->capture_last;
831 cb.callout_data = md->callout_data;
832 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833 if (rrc < 0) RRETURN(rrc);
834 }
835 ecode += _pcre_OP_lengths[OP_CALLOUT];
836 }
837
838 condcode = ecode[LINK_SIZE+1];
839
840 /* Now see what the actual condition is */
841
842 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
843 {
844 if (md->recursive == NULL) /* Not recursing => FALSE */
845 {
846 condition = FALSE;
847 ecode += GET(ecode, 1);
848 }
849 else
850 {
851 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
852 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
853
854 /* If the test is for recursion into a specific subpattern, and it is
855 false, but the test was set up by name, scan the table to see if the
856 name refers to any other numbers, and test them. The condition is true
857 if any one is set. */
858
859 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
860 {
861 uschar *slotA = md->name_table;
862 for (i = 0; i < md->name_count; i++)
863 {
864 if (GET2(slotA, 0) == recno) break;
865 slotA += md->name_entry_size;
866 }
867
868 /* Found a name for the number - there can be only one; duplicate
869 names for different numbers are allowed, but not vice versa. First
870 scan down for duplicates. */
871
872 if (i < md->name_count)
873 {
874 uschar *slotB = slotA;
875 while (slotB > md->name_table)
876 {
877 slotB -= md->name_entry_size;
878 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
879 {
880 condition = GET2(slotB, 0) == md->recursive->group_num;
881 if (condition) break;
882 }
883 else break;
884 }
885
886 /* Scan up for duplicates */
887
888 if (!condition)
889 {
890 slotB = slotA;
891 for (i++; i < md->name_count; i++)
892 {
893 slotB += md->name_entry_size;
894 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
895 {
896 condition = GET2(slotB, 0) == md->recursive->group_num;
897 if (condition) break;
898 }
899 else break;
900 }
901 }
902 }
903 }
904
905 /* Chose branch according to the condition */
906
907 ecode += condition? 3 : GET(ecode, 1);
908 }
909 }
910
911 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
912 {
913 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
914 condition = offset < offset_top && md->offset_vector[offset] >= 0;
915
916 /* If the numbered capture is unset, but the reference was by name,
917 scan the table to see if the name refers to any other numbers, and test
918 them. The condition is true if any one is set. This is tediously similar
919 to the code above, but not close enough to try to amalgamate. */
920
921 if (!condition && condcode == OP_NCREF)
922 {
923 int refno = offset >> 1;
924 uschar *slotA = md->name_table;
925
926 for (i = 0; i < md->name_count; i++)
927 {
928 if (GET2(slotA, 0) == refno) break;
929 slotA += md->name_entry_size;
930 }
931
932 /* Found a name for the number - there can be only one; duplicate names
933 for different numbers are allowed, but not vice versa. First scan down
934 for duplicates. */
935
936 if (i < md->name_count)
937 {
938 uschar *slotB = slotA;
939 while (slotB > md->name_table)
940 {
941 slotB -= md->name_entry_size;
942 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
943 {
944 offset = GET2(slotB, 0) << 1;
945 condition = offset < offset_top &&
946 md->offset_vector[offset] >= 0;
947 if (condition) break;
948 }
949 else break;
950 }
951
952 /* Scan up for duplicates */
953
954 if (!condition)
955 {
956 slotB = slotA;
957 for (i++; i < md->name_count; i++)
958 {
959 slotB += md->name_entry_size;
960 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961 {
962 offset = GET2(slotB, 0) << 1;
963 condition = offset < offset_top &&
964 md->offset_vector[offset] >= 0;
965 if (condition) break;
966 }
967 else break;
968 }
969 }
970 }
971 }
972
973 /* Chose branch according to the condition */
974
975 ecode += condition? 3 : GET(ecode, 1);
976 }
977
978 else if (condcode == OP_DEF) /* DEFINE - always false */
979 {
980 condition = FALSE;
981 ecode += GET(ecode, 1);
982 }
983
984 /* The condition is an assertion. Call match() to evaluate it - setting
985 the final argument match_condassert causes it to stop at the end of an
986 assertion. */
987
988 else
989 {
990 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
991 match_condassert, RM3);
992 if (rrc == MATCH_MATCH)
993 {
994 condition = TRUE;
995 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
996 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
997 }
998 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
999 {
1000 RRETURN(rrc); /* Need braces because of following else */
1001 }
1002 else
1003 {
1004 condition = FALSE;
1005 ecode += codelink;
1006 }
1007 }
1008
1009 /* We are now at the branch that is to be obeyed. As there is only one,
1010 we can use tail recursion to avoid using another stack frame, except when
1011 match_cbegroup is required for an unlimited repeat of a possibly empty
1012 group. If the second alternative doesn't exist, we can just plough on. */
1013
1014 if (condition || *ecode == OP_ALT)
1015 {
1016 ecode += 1 + LINK_SIZE;
1017 if (op == OP_SCOND) /* Possibly empty group */
1018 {
1019 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1020 RRETURN(rrc);
1021 }
1022 else /* Group must match something */
1023 {
1024 flags = 0;
1025 goto TAIL_RECURSE;
1026 }
1027 }
1028 else /* Condition false & no alternative */
1029 {
1030 ecode += 1 + LINK_SIZE;
1031 }
1032 break;
1033
1034
1035 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1036 to close any currently open capturing brackets. */
1037
1038 case OP_CLOSE:
1039 number = GET2(ecode, 1);
1040 offset = number << 1;
1041
1042 #ifdef DEBUG
1043 printf("end bracket %d at *ACCEPT", number);
1044 printf("\n");
1045 #endif
1046
1047 md->capture_last = number;
1048 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1049 {
1050 md->offset_vector[offset] =
1051 md->offset_vector[md->offset_end - number];
1052 md->offset_vector[offset+1] = eptr - md->start_subject;
1053 if (offset_top <= offset) offset_top = offset + 2;
1054 }
1055 ecode += 3;
1056 break;
1057
1058
1059 /* End of the pattern, either real or forced. If we are in a top-level
1060 recursion, we should restore the offsets appropriately and continue from
1061 after the call. */
1062
1063 case OP_ACCEPT:
1064 case OP_END:
1065 if (md->recursive != NULL && md->recursive->group_num == 0)
1066 {
1067 recursion_info *rec = md->recursive;
1068 DPRINTF(("End of pattern in a (?0) recursion\n"));
1069 md->recursive = rec->prevrec;
1070 memmove(md->offset_vector, rec->offset_save,
1071 rec->saved_max * sizeof(int));
1072 offset_top = rec->offset_top;
1073 mstart = rec->save_start;
1074 ims = original_ims;
1075 ecode = rec->after_call;
1076 break;
1077 }
1078
1079 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1080 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1081 the subject. In both cases, backtracking will then try other alternatives,
1082 if any. */
1083
1084 if (eptr == mstart &&
1085 (md->notempty ||
1086 (md->notempty_atstart &&
1087 mstart == md->start_subject + md->start_offset)))
1088 RRETURN(MATCH_NOMATCH);
1089
1090 /* Otherwise, we have a match. */
1091
1092 md->end_match_ptr = eptr; /* Record where we ended */
1093 md->end_offset_top = offset_top; /* and how many extracts were taken */
1094 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1095 RRETURN(MATCH_MATCH);
1096
1097 /* Change option settings */
1098
1099 case OP_OPT:
1100 ims = ecode[1];
1101 ecode += 2;
1102 DPRINTF(("ims set to %02lx\n", ims));
1103 break;
1104
1105 /* Assertion brackets. Check the alternative branches in turn - the
1106 matching won't pass the KET for an assertion. If any one branch matches,
1107 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1108 start of each branch to move the current point backwards, so the code at
1109 this level is identical to the lookahead case. */
1110
1111 case OP_ASSERT:
1112 case OP_ASSERTBACK:
1113 do
1114 {
1115 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1116 RM4);
1117 if (rrc == MATCH_MATCH) break;
1118 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1119 ecode += GET(ecode, 1);
1120 }
1121 while (*ecode == OP_ALT);
1122 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1123
1124 /* If checking an assertion for a condition, return MATCH_MATCH. */
1125
1126 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1127
1128 /* Continue from after the assertion, updating the offsets high water
1129 mark, since extracts may have been taken during the assertion. */
1130
1131 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1132 ecode += 1 + LINK_SIZE;
1133 offset_top = md->end_offset_top;
1134 continue;
1135
1136 /* Negative assertion: all branches must fail to match */
1137
1138 case OP_ASSERT_NOT:
1139 case OP_ASSERTBACK_NOT:
1140 do
1141 {
1142 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1143 RM5);
1144 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
1145 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1146 ecode += GET(ecode,1);
1147 }
1148 while (*ecode == OP_ALT);
1149
1150 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1151
1152 ecode += 1 + LINK_SIZE;
1153 continue;
1154
1155 /* Move the subject pointer back. This occurs only at the start of
1156 each branch of a lookbehind assertion. If we are too close to the start to
1157 move back, this match function fails. When working with UTF-8 we move
1158 back a number of characters, not bytes. */
1159
1160 case OP_REVERSE:
1161 #ifdef SUPPORT_UTF8
1162 if (utf8)
1163 {
1164 i = GET(ecode, 1);
1165 while (i-- > 0)
1166 {
1167 eptr--;
1168 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1169 BACKCHAR(eptr);
1170 }
1171 }
1172 else
1173 #endif
1174
1175 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1176
1177 {
1178 eptr -= GET(ecode, 1);
1179 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1180 }
1181
1182 /* Save the earliest consulted character, then skip to next op code */
1183
1184 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1185 ecode += 1 + LINK_SIZE;
1186 break;
1187
1188 /* The callout item calls an external function, if one is provided, passing
1189 details of the match so far. This is mainly for debugging, though the
1190 function is able to force a failure. */
1191
1192 case OP_CALLOUT:
1193 if (pcre_callout != NULL)
1194 {
1195 pcre_callout_block cb;
1196 cb.version = 1; /* Version 1 of the callout block */
1197 cb.callout_number = ecode[1];
1198 cb.offset_vector = md->offset_vector;
1199 cb.subject = (PCRE_SPTR)md->start_subject;
1200 cb.subject_length = md->end_subject - md->start_subject;
1201 cb.start_match = mstart - md->start_subject;
1202 cb.current_position = eptr - md->start_subject;
1203 cb.pattern_position = GET(ecode, 2);
1204 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1205 cb.capture_top = offset_top/2;
1206 cb.capture_last = md->capture_last;
1207 cb.callout_data = md->callout_data;
1208 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1209 if (rrc < 0) RRETURN(rrc);
1210 }
1211 ecode += 2 + 2*LINK_SIZE;
1212 break;
1213
1214 /* Recursion either matches the current regex, or some subexpression. The
1215 offset data is the offset to the starting bracket from the start of the
1216 whole pattern. (This is so that it works from duplicated subpatterns.)
1217
1218 If there are any capturing brackets started but not finished, we have to
1219 save their starting points and reinstate them after the recursion. However,
1220 we don't know how many such there are (offset_top records the completed
1221 total) so we just have to save all the potential data. There may be up to
1222 65535 such values, which is too large to put on the stack, but using malloc
1223 for small numbers seems expensive. As a compromise, the stack is used when
1224 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1225 is used. A problem is what to do if the malloc fails ... there is no way of
1226 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1227 values on the stack, and accept that the rest may be wrong.
1228
1229 There are also other values that have to be saved. We use a chained
1230 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1231 for the original version of this logic. */
1232
1233 case OP_RECURSE:
1234 {
1235 callpat = md->start_code + GET(ecode, 1);
1236 new_recursive.group_num = (callpat == md->start_code)? 0 :
1237 GET2(callpat, 1 + LINK_SIZE);
1238
1239 /* Add to "recursing stack" */
1240
1241 new_recursive.prevrec = md->recursive;
1242 md->recursive = &new_recursive;
1243
1244 /* Find where to continue from afterwards */
1245
1246 ecode += 1 + LINK_SIZE;
1247 new_recursive.after_call = ecode;
1248
1249 /* Now save the offset data. */
1250
1251 new_recursive.saved_max = md->offset_end;
1252 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1253 new_recursive.offset_save = stacksave;
1254 else
1255 {
1256 new_recursive.offset_save =
1257 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1258 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1259 }
1260
1261 memcpy(new_recursive.offset_save, md->offset_vector,
1262 new_recursive.saved_max * sizeof(int));
1263 new_recursive.save_start = mstart;
1264 new_recursive.offset_top = offset_top;
1265 mstart = eptr;
1266
1267 /* OK, now we can do the recursion. For each top-level alternative we
1268 restore the offset and recursion data. */
1269
1270 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1271 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1272 do
1273 {
1274 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1275 md, ims, eptrb, flags, RM6);
1276 if (rrc == MATCH_MATCH)
1277 {
1278 DPRINTF(("Recursion matched\n"));
1279 md->recursive = new_recursive.prevrec;
1280 if (new_recursive.offset_save != stacksave)
1281 (pcre_free)(new_recursive.offset_save);
1282 RRETURN(MATCH_MATCH);
1283 }
1284 else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1285 {
1286 DPRINTF(("Recursion gave error %d\n", rrc));
1287 if (new_recursive.offset_save != stacksave)
1288 (pcre_free)(new_recursive.offset_save);
1289 RRETURN(rrc);
1290 }
1291
1292 md->recursive = &new_recursive;
1293 memcpy(md->offset_vector, new_recursive.offset_save,
1294 new_recursive.saved_max * sizeof(int));
1295 callpat += GET(callpat, 1);
1296 }
1297 while (*callpat == OP_ALT);
1298
1299 DPRINTF(("Recursion didn't match\n"));
1300 md->recursive = new_recursive.prevrec;
1301 if (new_recursive.offset_save != stacksave)
1302 (pcre_free)(new_recursive.offset_save);
1303 RRETURN(MATCH_NOMATCH);
1304 }
1305 /* Control never reaches here */
1306
1307 /* "Once" brackets are like assertion brackets except that after a match,
1308 the point in the subject string is not moved back. Thus there can never be
1309 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1310 Check the alternative branches in turn - the matching won't pass the KET
1311 for this kind of subpattern. If any one branch matches, we carry on as at
1312 the end of a normal bracket, leaving the subject pointer. */
1313
1314 case OP_ONCE:
1315 prev = ecode;
1316 saved_eptr = eptr;
1317
1318 do
1319 {
1320 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1321 if (rrc == MATCH_MATCH) break;
1322 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1323 ecode += GET(ecode,1);
1324 }
1325 while (*ecode == OP_ALT);
1326
1327 /* If hit the end of the group (which could be repeated), fail */
1328
1329 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1330
1331 /* Continue as from after the assertion, updating the offsets high water
1332 mark, since extracts may have been taken. */
1333
1334 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1335
1336 offset_top = md->end_offset_top;
1337 eptr = md->end_match_ptr;
1338
1339 /* For a non-repeating ket, just continue at this level. This also
1340 happens for a repeating ket if no characters were matched in the group.
1341 This is the forcible breaking of infinite loops as implemented in Perl
1342 5.005. If there is an options reset, it will get obeyed in the normal
1343 course of events. */
1344
1345 if (*ecode == OP_KET || eptr == saved_eptr)
1346 {
1347 ecode += 1+LINK_SIZE;
1348 break;
1349 }
1350
1351 /* The repeating kets try the rest of the pattern or restart from the
1352 preceding bracket, in the appropriate order. The second "call" of match()
1353 uses tail recursion, to avoid using another stack frame. We need to reset
1354 any options that changed within the bracket before re-running it, so
1355 check the next opcode. */
1356
1357 if (ecode[1+LINK_SIZE] == OP_OPT)
1358 {
1359 ims = (ims & ~PCRE_IMS) | ecode[4];
1360 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1361 }
1362
1363 if (*ecode == OP_KETRMIN)
1364 {
1365 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1366 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1367 ecode = prev;
1368 flags = 0;
1369 goto TAIL_RECURSE;
1370 }
1371 else /* OP_KETRMAX */
1372 {
1373 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1374 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1375 ecode += 1 + LINK_SIZE;
1376 flags = 0;
1377 goto TAIL_RECURSE;
1378 }
1379 /* Control never gets here */
1380
1381 /* An alternation is the end of a branch; scan along to find the end of the
1382 bracketed group and go to there. */
1383
1384 case OP_ALT:
1385 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1386 break;
1387
1388 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1389 indicating that it may occur zero times. It may repeat infinitely, or not
1390 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1391 with fixed upper repeat limits are compiled as a number of copies, with the
1392 optional ones preceded by BRAZERO or BRAMINZERO. */
1393
1394 case OP_BRAZERO:
1395 {
1396 next = ecode+1;
1397 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1398 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1399 do next += GET(next,1); while (*next == OP_ALT);
1400 ecode = next + 1 + LINK_SIZE;
1401 }
1402 break;
1403
1404 case OP_BRAMINZERO:
1405 {
1406 next = ecode+1;
1407 do next += GET(next, 1); while (*next == OP_ALT);
1408 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1409 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1410 ecode++;
1411 }
1412 break;
1413
1414 case OP_SKIPZERO:
1415 {
1416 next = ecode+1;
1417 do next += GET(next,1); while (*next == OP_ALT);
1418 ecode = next + 1 + LINK_SIZE;
1419 }
1420 break;
1421
1422 /* End of a group, repeated or non-repeating. */
1423
1424 case OP_KET:
1425 case OP_KETRMIN:
1426 case OP_KETRMAX:
1427 prev = ecode - GET(ecode, 1);
1428
1429 /* If this was a group that remembered the subject start, in order to break
1430 infinite repeats of empty string matches, retrieve the subject start from
1431 the chain. Otherwise, set it NULL. */
1432
1433 if (*prev >= OP_SBRA)
1434 {
1435 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1436 eptrb = eptrb->epb_prev; /* Backup to previous group */
1437 }
1438 else saved_eptr = NULL;
1439
1440 /* If we are at the end of an assertion group, stop matching and return
1441 MATCH_MATCH, but record the current high water mark for use by positive
1442 assertions. Do this also for the "once" (atomic) groups. */
1443
1444 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1445 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1446 *prev == OP_ONCE)
1447 {
1448 md->end_match_ptr = eptr; /* For ONCE */
1449 md->end_offset_top = offset_top;
1450 RRETURN(MATCH_MATCH);
1451 }
1452
1453 /* For capturing groups we have to check the group number back at the start
1454 and if necessary complete handling an extraction by setting the offsets and
1455 bumping the high water mark. Note that whole-pattern recursion is coded as
1456 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1457 when the OP_END is reached. Other recursion is handled here. */
1458
1459 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1460 {
1461 number = GET2(prev, 1+LINK_SIZE);
1462 offset = number << 1;
1463
1464 #ifdef DEBUG
1465 printf("end bracket %d", number);
1466 printf("\n");
1467 #endif
1468
1469 md->capture_last = number;
1470 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1471 {
1472 md->offset_vector[offset] =
1473 md->offset_vector[md->offset_end - number];
1474 md->offset_vector[offset+1] = eptr - md->start_subject;
1475 if (offset_top <= offset) offset_top = offset + 2;
1476 }
1477
1478 /* Handle a recursively called group. Restore the offsets
1479 appropriately and continue from after the call. */
1480
1481 if (md->recursive != NULL && md->recursive->group_num == number)
1482 {
1483 recursion_info *rec = md->recursive;
1484 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1485 md->recursive = rec->prevrec;
1486 mstart = rec->save_start;
1487 memcpy(md->offset_vector, rec->offset_save,
1488 rec->saved_max * sizeof(int));
1489 offset_top = rec->offset_top;
1490 ecode = rec->after_call;
1491 ims = original_ims;
1492 break;
1493 }
1494 }
1495
1496 /* For both capturing and non-capturing groups, reset the value of the ims
1497 flags, in case they got changed during the group. */
1498
1499 ims = original_ims;
1500 DPRINTF(("ims reset to %02lx\n", ims));
1501
1502 /* For a non-repeating ket, just continue at this level. This also
1503 happens for a repeating ket if no characters were matched in the group.
1504 This is the forcible breaking of infinite loops as implemented in Perl
1505 5.005. If there is an options reset, it will get obeyed in the normal
1506 course of events. */
1507
1508 if (*ecode == OP_KET || eptr == saved_eptr)
1509 {
1510 ecode += 1 + LINK_SIZE;
1511 break;
1512 }
1513
1514 /* The repeating kets try the rest of the pattern or restart from the
1515 preceding bracket, in the appropriate order. In the second case, we can use
1516 tail recursion to avoid using another stack frame, unless we have an
1517 unlimited repeat of a group that can match an empty string. */
1518
1519 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1520
1521 if (*ecode == OP_KETRMIN)
1522 {
1523 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1524 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1525 if (flags != 0) /* Could match an empty string */
1526 {
1527 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1528 RRETURN(rrc);
1529 }
1530 ecode = prev;
1531 goto TAIL_RECURSE;
1532 }
1533 else /* OP_KETRMAX */
1534 {
1535 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1536 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1537 ecode += 1 + LINK_SIZE;
1538 flags = 0;
1539 goto TAIL_RECURSE;
1540 }
1541 /* Control never gets here */
1542
1543 /* Start of subject unless notbol, or after internal newline if multiline */
1544
1545 case OP_CIRC:
1546 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1547 if ((ims & PCRE_MULTILINE) != 0)
1548 {
1549 if (eptr != md->start_subject &&
1550 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1551 RRETURN(MATCH_NOMATCH);
1552 ecode++;
1553 break;
1554 }
1555 /* ... else fall through */
1556
1557 /* Start of subject assertion */
1558
1559 case OP_SOD:
1560 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1561 ecode++;
1562 break;
1563
1564 /* Start of match assertion */
1565
1566 case OP_SOM:
1567 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1568 ecode++;
1569 break;
1570
1571 /* Reset the start of match point */
1572
1573 case OP_SET_SOM:
1574 mstart = eptr;
1575 ecode++;
1576 break;
1577
1578 /* Assert before internal newline if multiline, or before a terminating
1579 newline unless endonly is set, else end of subject unless noteol is set. */
1580
1581 case OP_DOLL:
1582 if ((ims & PCRE_MULTILINE) != 0)
1583 {
1584 if (eptr < md->end_subject)
1585 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1586 else
1587 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1588 ecode++;
1589 break;
1590 }
1591 else
1592 {
1593 if (md->noteol) RRETURN(MATCH_NOMATCH);
1594 if (!md->endonly)
1595 {
1596 if (eptr != md->end_subject &&
1597 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1598 RRETURN(MATCH_NOMATCH);
1599 ecode++;
1600 break;
1601 }
1602 }
1603 /* ... else fall through for endonly */
1604
1605 /* End of subject assertion (\z) */
1606
1607 case OP_EOD:
1608 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1609 ecode++;
1610 break;
1611
1612 /* End of subject or ending \n assertion (\Z) */
1613
1614 case OP_EODN:
1615 if (eptr != md->end_subject &&
1616 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1617 RRETURN(MATCH_NOMATCH);
1618 ecode++;
1619 break;
1620
1621 /* Word boundary assertions */
1622
1623 case OP_NOT_WORD_BOUNDARY:
1624 case OP_WORD_BOUNDARY:
1625 {
1626
1627 /* Find out if the previous and current characters are "word" characters.
1628 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1629 be "non-word" characters. Remember the earliest consulted character for
1630 partial matching. */
1631
1632 #ifdef SUPPORT_UTF8
1633 if (utf8)
1634 {
1635 if (eptr == md->start_subject) prev_is_word = FALSE; else
1636 {
1637 USPTR lastptr = eptr - 1;
1638 while((*lastptr & 0xc0) == 0x80) lastptr--;
1639 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1640 GETCHAR(c, lastptr);
1641 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1642 }
1643 if (eptr >= md->end_subject)
1644 {
1645 SCHECK_PARTIAL();
1646 cur_is_word = FALSE;
1647 }
1648 else
1649 {
1650 GETCHAR(c, eptr);
1651 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1652 }
1653 }
1654 else
1655 #endif
1656
1657 /* Not in UTF-8 mode */
1658
1659 {
1660 if (eptr == md->start_subject) prev_is_word = FALSE; else
1661 {
1662 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1663 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1664 }
1665 if (eptr >= md->end_subject)
1666 {
1667 SCHECK_PARTIAL();
1668 cur_is_word = FALSE;
1669 }
1670 else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1671 }
1672
1673 /* Now see if the situation is what we want */
1674
1675 if ((*ecode++ == OP_WORD_BOUNDARY)?
1676 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1677 RRETURN(MATCH_NOMATCH);
1678 }
1679 break;
1680
1681 /* Match a single character type; inline for speed */
1682
1683 case OP_ANY:
1684 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1685 /* Fall through */
1686
1687 case OP_ALLANY:
1688 if (eptr++ >= md->end_subject)
1689 {
1690 SCHECK_PARTIAL();
1691 RRETURN(MATCH_NOMATCH);
1692 }
1693 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1694 ecode++;
1695 break;
1696
1697 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1698 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1699
1700 case OP_ANYBYTE:
1701 if (eptr++ >= md->end_subject)
1702 {
1703 SCHECK_PARTIAL();
1704 RRETURN(MATCH_NOMATCH);
1705 }
1706 ecode++;
1707 break;
1708
1709 case OP_NOT_DIGIT:
1710 if (eptr >= md->end_subject)
1711 {
1712 SCHECK_PARTIAL();
1713 RRETURN(MATCH_NOMATCH);
1714 }
1715 GETCHARINCTEST(c, eptr);
1716 if (
1717 #ifdef SUPPORT_UTF8
1718 c < 256 &&
1719 #endif
1720 (md->ctypes[c] & ctype_digit) != 0
1721 )
1722 RRETURN(MATCH_NOMATCH);
1723 ecode++;
1724 break;
1725
1726 case OP_DIGIT:
1727 if (eptr >= md->end_subject)
1728 {
1729 SCHECK_PARTIAL();
1730 RRETURN(MATCH_NOMATCH);
1731 }
1732 GETCHARINCTEST(c, eptr);
1733 if (
1734 #ifdef SUPPORT_UTF8
1735 c >= 256 ||
1736 #endif
1737 (md->ctypes[c] & ctype_digit) == 0
1738 )
1739 RRETURN(MATCH_NOMATCH);
1740 ecode++;
1741 break;
1742
1743 case OP_NOT_WHITESPACE:
1744 if (eptr >= md->end_subject)
1745 {
1746 SCHECK_PARTIAL();
1747 RRETURN(MATCH_NOMATCH);
1748 }
1749 GETCHARINCTEST(c, eptr);
1750 if (
1751 #ifdef SUPPORT_UTF8
1752 c < 256 &&
1753 #endif
1754 (md->ctypes[c] & ctype_space) != 0
1755 )
1756 RRETURN(MATCH_NOMATCH);
1757 ecode++;
1758 break;
1759
1760 case OP_WHITESPACE:
1761 if (eptr >= md->end_subject)
1762 {
1763 SCHECK_PARTIAL();
1764 RRETURN(MATCH_NOMATCH);
1765 }
1766 GETCHARINCTEST(c, eptr);
1767 if (
1768 #ifdef SUPPORT_UTF8
1769 c >= 256 ||
1770 #endif
1771 (md->ctypes[c] & ctype_space) == 0
1772 )
1773 RRETURN(MATCH_NOMATCH);
1774 ecode++;
1775 break;
1776
1777 case OP_NOT_WORDCHAR:
1778 if (eptr >= md->end_subject)
1779 {
1780 SCHECK_PARTIAL();
1781 RRETURN(MATCH_NOMATCH);
1782 }
1783 GETCHARINCTEST(c, eptr);
1784 if (
1785 #ifdef SUPPORT_UTF8
1786 c < 256 &&
1787 #endif
1788 (md->ctypes[c] & ctype_word) != 0
1789 )
1790 RRETURN(MATCH_NOMATCH);
1791 ecode++;
1792 break;
1793
1794 case OP_WORDCHAR:
1795 if (eptr >= md->end_subject)
1796 {
1797 SCHECK_PARTIAL();
1798 RRETURN(MATCH_NOMATCH);
1799 }
1800 GETCHARINCTEST(c, eptr);
1801 if (
1802 #ifdef SUPPORT_UTF8
1803 c >= 256 ||
1804 #endif
1805 (md->ctypes[c] & ctype_word) == 0
1806 )
1807 RRETURN(MATCH_NOMATCH);
1808 ecode++;
1809 break;
1810
1811 case OP_ANYNL:
1812 if (eptr >= md->end_subject)
1813 {
1814 SCHECK_PARTIAL();
1815 RRETURN(MATCH_NOMATCH);
1816 }
1817 GETCHARINCTEST(c, eptr);
1818 switch(c)
1819 {
1820 default: RRETURN(MATCH_NOMATCH);
1821 case 0x000d:
1822 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1823 break;
1824
1825 case 0x000a:
1826 break;
1827
1828 case 0x000b:
1829 case 0x000c:
1830 case 0x0085:
1831 case 0x2028:
1832 case 0x2029:
1833 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1834 break;
1835 }
1836 ecode++;
1837 break;
1838
1839 case OP_NOT_HSPACE:
1840 if (eptr >= md->end_subject)
1841 {
1842 SCHECK_PARTIAL();
1843 RRETURN(MATCH_NOMATCH);
1844 }
1845 GETCHARINCTEST(c, eptr);
1846 switch(c)
1847 {
1848 default: break;
1849 case 0x09: /* HT */
1850 case 0x20: /* SPACE */
1851 case 0xa0: /* NBSP */
1852 case 0x1680: /* OGHAM SPACE MARK */
1853 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1854 case 0x2000: /* EN QUAD */
1855 case 0x2001: /* EM QUAD */
1856 case 0x2002: /* EN SPACE */
1857 case 0x2003: /* EM SPACE */
1858 case 0x2004: /* THREE-PER-EM SPACE */
1859 case 0x2005: /* FOUR-PER-EM SPACE */
1860 case 0x2006: /* SIX-PER-EM SPACE */
1861 case 0x2007: /* FIGURE SPACE */
1862 case 0x2008: /* PUNCTUATION SPACE */
1863 case 0x2009: /* THIN SPACE */
1864 case 0x200A: /* HAIR SPACE */
1865 case 0x202f: /* NARROW NO-BREAK SPACE */
1866 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1867 case 0x3000: /* IDEOGRAPHIC SPACE */
1868 RRETURN(MATCH_NOMATCH);
1869 }
1870 ecode++;
1871 break;
1872
1873 case OP_HSPACE:
1874 if (eptr >= md->end_subject)
1875 {
1876 SCHECK_PARTIAL();
1877 RRETURN(MATCH_NOMATCH);
1878 }
1879 GETCHARINCTEST(c, eptr);
1880 switch(c)
1881 {
1882 default: RRETURN(MATCH_NOMATCH);
1883 case 0x09: /* HT */
1884 case 0x20: /* SPACE */
1885 case 0xa0: /* NBSP */
1886 case 0x1680: /* OGHAM SPACE MARK */
1887 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1888 case 0x2000: /* EN QUAD */
1889 case 0x2001: /* EM QUAD */
1890 case 0x2002: /* EN SPACE */
1891 case 0x2003: /* EM SPACE */
1892 case 0x2004: /* THREE-PER-EM SPACE */
1893 case 0x2005: /* FOUR-PER-EM SPACE */
1894 case 0x2006: /* SIX-PER-EM SPACE */
1895 case 0x2007: /* FIGURE SPACE */
1896 case 0x2008: /* PUNCTUATION SPACE */
1897 case 0x2009: /* THIN SPACE */
1898 case 0x200A: /* HAIR SPACE */
1899 case 0x202f: /* NARROW NO-BREAK SPACE */
1900 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1901 case 0x3000: /* IDEOGRAPHIC SPACE */
1902 break;
1903 }
1904 ecode++;
1905 break;
1906
1907 case OP_NOT_VSPACE:
1908 if (eptr >= md->end_subject)
1909 {
1910 SCHECK_PARTIAL();
1911 RRETURN(MATCH_NOMATCH);
1912 }
1913 GETCHARINCTEST(c, eptr);
1914 switch(c)
1915 {
1916 default: break;
1917 case 0x0a: /* LF */
1918 case 0x0b: /* VT */
1919 case 0x0c: /* FF */
1920 case 0x0d: /* CR */
1921 case 0x85: /* NEL */
1922 case 0x2028: /* LINE SEPARATOR */
1923 case 0x2029: /* PARAGRAPH SEPARATOR */
1924 RRETURN(MATCH_NOMATCH);
1925 }
1926 ecode++;
1927 break;
1928
1929 case OP_VSPACE:
1930 if (eptr >= md->end_subject)
1931 {
1932 SCHECK_PARTIAL();
1933 RRETURN(MATCH_NOMATCH);
1934 }
1935 GETCHARINCTEST(c, eptr);
1936 switch(c)
1937 {
1938 default: RRETURN(MATCH_NOMATCH);
1939 case 0x0a: /* LF */
1940 case 0x0b: /* VT */
1941 case 0x0c: /* FF */
1942 case 0x0d: /* CR */
1943 case 0x85: /* NEL */
1944 case 0x2028: /* LINE SEPARATOR */
1945 case 0x2029: /* PARAGRAPH SEPARATOR */
1946 break;
1947 }
1948 ecode++;
1949 break;
1950
1951 #ifdef SUPPORT_UCP
1952 /* Check the next character by Unicode property. We will get here only
1953 if the support is in the binary; otherwise a compile-time error occurs. */
1954
1955 case OP_PROP:
1956 case OP_NOTPROP:
1957 if (eptr >= md->end_subject)
1958 {
1959 SCHECK_PARTIAL();
1960 RRETURN(MATCH_NOMATCH);
1961 }
1962 GETCHARINCTEST(c, eptr);
1963 {
1964 const ucd_record *prop = GET_UCD(c);
1965
1966 switch(ecode[1])
1967 {
1968 case PT_ANY:
1969 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1970 break;
1971
1972 case PT_LAMP:
1973 if ((prop->chartype == ucp_Lu ||
1974 prop->chartype == ucp_Ll ||
1975 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1976 RRETURN(MATCH_NOMATCH);
1977 break;
1978
1979 case PT_GC:
1980 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1981 RRETURN(MATCH_NOMATCH);
1982 break;
1983
1984 case PT_PC:
1985 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1986 RRETURN(MATCH_NOMATCH);
1987 break;
1988
1989 case PT_SC:
1990 if ((ecode[2] != prop->script) == (op == OP_PROP))
1991 RRETURN(MATCH_NOMATCH);
1992 break;
1993
1994 default:
1995 RRETURN(PCRE_ERROR_INTERNAL);
1996 }
1997
1998 ecode += 3;
1999 }
2000 break;
2001
2002 /* Match an extended Unicode sequence. We will get here only if the support
2003 is in the binary; otherwise a compile-time error occurs. */
2004
2005 case OP_EXTUNI:
2006 if (eptr >= md->end_subject)
2007 {
2008 SCHECK_PARTIAL();
2009 RRETURN(MATCH_NOMATCH);
2010 }
2011 GETCHARINCTEST(c, eptr);
2012 {
2013 int category = UCD_CATEGORY(c);
2014 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
2015 while (eptr < md->end_subject)
2016 {
2017 int len = 1;
2018 if (!utf8) c = *eptr; else
2019 {
2020 GETCHARLEN(c, eptr, len);
2021 }
2022 category = UCD_CATEGORY(c);
2023 if (category != ucp_M) break;
2024 eptr += len;
2025 }
2026 }
2027 ecode++;
2028 break;
2029 #endif
2030
2031
2032 /* Match a back reference, possibly repeatedly. Look past the end of the
2033 item to see if there is repeat information following. The code is similar
2034 to that for character classes, but repeated for efficiency. Then obey
2035 similar code to character type repeats - written out again for speed.
2036 However, if the referenced string is the empty string, always treat
2037 it as matched, any number of times (otherwise there could be infinite
2038 loops). */
2039
2040 case OP_REF:
2041 {
2042 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2043 ecode += 3;
2044
2045 /* If the reference is unset, there are two possibilities:
2046
2047 (a) In the default, Perl-compatible state, set the length to be longer
2048 than the amount of subject left; this ensures that every attempt at a
2049 match fails. We can't just fail here, because of the possibility of
2050 quantifiers with zero minima.
2051
2052 (b) If the JavaScript compatibility flag is set, set the length to zero
2053 so that the back reference matches an empty string.
2054
2055 Otherwise, set the length to the length of what was matched by the
2056 referenced subpattern. */
2057
2058 if (offset >= offset_top || md->offset_vector[offset] < 0)
2059 length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2060 else
2061 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2062
2063 /* Set up for repetition, or handle the non-repeated case */
2064
2065 switch (*ecode)
2066 {
2067 case OP_CRSTAR:
2068 case OP_CRMINSTAR:
2069 case OP_CRPLUS:
2070 case OP_CRMINPLUS:
2071 case OP_CRQUERY:
2072 case OP_CRMINQUERY:
2073 c = *ecode++ - OP_CRSTAR;
2074 minimize = (c & 1) != 0;
2075 min = rep_min[c]; /* Pick up values from tables; */
2076 max = rep_max[c]; /* zero for max => infinity */
2077 if (max == 0) max = INT_MAX;
2078 break;
2079
2080 case OP_CRRANGE:
2081 case OP_CRMINRANGE:
2082 minimize = (*ecode == OP_CRMINRANGE);
2083 min = GET2(ecode, 1);
2084 max = GET2(ecode, 3);
2085 if (max == 0) max = INT_MAX;
2086 ecode += 5;
2087 break;
2088
2089 default: /* No repeat follows */
2090 if (!match_ref(offset, eptr, length, md, ims))
2091 {
2092 CHECK_PARTIAL();
2093 RRETURN(MATCH_NOMATCH);
2094 }
2095 eptr += length;
2096 continue; /* With the main loop */
2097 }
2098
2099 /* If the length of the reference is zero, just continue with the
2100 main loop. */
2101
2102 if (length == 0) continue;
2103
2104 /* First, ensure the minimum number of matches are present. We get back
2105 the length of the reference string explicitly rather than passing the
2106 address of eptr, so that eptr can be a register variable. */
2107
2108 for (i = 1; i <= min; i++)
2109 {
2110 if (!match_ref(offset, eptr, length, md, ims))
2111 {
2112 CHECK_PARTIAL();
2113 RRETURN(MATCH_NOMATCH);
2114 }
2115 eptr += length;
2116 }
2117
2118 /* If min = max, continue at the same level without recursion.
2119 They are not both allowed to be zero. */
2120
2121 if (min == max) continue;
2122
2123 /* If minimizing, keep trying and advancing the pointer */
2124
2125 if (minimize)
2126 {
2127 for (fi = min;; fi++)
2128 {
2129 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2130 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2131 if (fi >= max) RRETURN(MATCH_NOMATCH);
2132 if (!match_ref(offset, eptr, length, md, ims))
2133 {
2134 CHECK_PARTIAL();
2135 RRETURN(MATCH_NOMATCH);
2136 }
2137 eptr += length;
2138 }
2139 /* Control never gets here */
2140 }
2141
2142 /* If maximizing, find the longest string and work backwards */
2143
2144 else
2145 {
2146 pp = eptr;
2147 for (i = min; i < max; i++)
2148 {
2149 if (!match_ref(offset, eptr, length, md, ims)) break;
2150 eptr += length;
2151 }
2152 while (eptr >= pp)
2153 {
2154 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2155 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2156 eptr -= length;
2157 }
2158 RRETURN(MATCH_NOMATCH);
2159 }
2160 }
2161 /* Control never gets here */
2162
2163 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2164 used when all the characters in the class have values in the range 0-255,
2165 and either the matching is caseful, or the characters are in the range
2166 0-127 when UTF-8 processing is enabled. The only difference between
2167 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2168 encountered.
2169
2170 First, look past the end of the item to see if there is repeat information
2171 following. Then obey similar code to character type repeats - written out
2172 again for speed. */
2173
2174 case OP_NCLASS:
2175 case OP_CLASS:
2176 {
2177 data = ecode + 1; /* Save for matching */
2178 ecode += 33; /* Advance past the item */
2179
2180 switch (*ecode)
2181 {
2182 case OP_CRSTAR:
2183 case OP_CRMINSTAR:
2184 case OP_CRPLUS:
2185 case OP_CRMINPLUS:
2186 case OP_CRQUERY:
2187 case OP_CRMINQUERY:
2188 c = *ecode++ - OP_CRSTAR;
2189 minimize = (c & 1) != 0;
2190 min = rep_min[c]; /* Pick up values from tables; */
2191 max = rep_max[c]; /* zero for max => infinity */
2192 if (max == 0) max = INT_MAX;
2193 break;
2194
2195 case OP_CRRANGE:
2196 case OP_CRMINRANGE:
2197 minimize = (*ecode == OP_CRMINRANGE);
2198 min = GET2(ecode, 1);
2199 max = GET2(ecode, 3);
2200 if (max == 0) max = INT_MAX;
2201 ecode += 5;
2202 break;
2203
2204 default: /* No repeat follows */
2205 min = max = 1;
2206 break;
2207 }
2208
2209 /* First, ensure the minimum number of matches are present. */
2210
2211 #ifdef SUPPORT_UTF8
2212 /* UTF-8 mode */
2213 if (utf8)
2214 {
2215 for (i = 1; i <= min; i++)
2216 {
2217 if (eptr >= md->end_subject)
2218 {
2219 SCHECK_PARTIAL();
2220 RRETURN(MATCH_NOMATCH);
2221 }
2222 GETCHARINC(c, eptr);
2223 if (c > 255)
2224 {
2225 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2226 }
2227 else
2228 {
2229 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2230 }
2231 }
2232 }
2233 else
2234 #endif
2235 /* Not UTF-8 mode */
2236 {
2237 for (i = 1; i <= min; i++)
2238 {
2239 if (eptr >= md->end_subject)
2240 {
2241 SCHECK_PARTIAL();
2242 RRETURN(MATCH_NOMATCH);
2243 }
2244 c = *eptr++;
2245 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2246 }
2247 }
2248
2249 /* If max == min we can continue with the main loop without the
2250 need to recurse. */
2251
2252 if (min == max) continue;
2253
2254 /* If minimizing, keep testing the rest of the expression and advancing
2255 the pointer while it matches the class. */
2256
2257 if (minimize)
2258 {
2259 #ifdef SUPPORT_UTF8
2260 /* UTF-8 mode */
2261 if (utf8)
2262 {
2263 for (fi = min;; fi++)
2264 {
2265 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2266 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2267 if (fi >= max) RRETURN(MATCH_NOMATCH);
2268 if (eptr >= md->end_subject)
2269 {
2270 SCHECK_PARTIAL();
2271 RRETURN(MATCH_NOMATCH);
2272 }
2273 GETCHARINC(c, eptr);
2274 if (c > 255)
2275 {
2276 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2277 }
2278 else
2279 {
2280 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2281 }
2282 }
2283 }
2284 else
2285 #endif
2286 /* Not UTF-8 mode */
2287 {
2288 for (fi = min;; fi++)
2289 {
2290 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2291 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2292 if (fi >= max) RRETURN(MATCH_NOMATCH);
2293 if (eptr >= md->end_subject)
2294 {
2295 SCHECK_PARTIAL();
2296 RRETURN(MATCH_NOMATCH);
2297 }
2298 c = *eptr++;
2299 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2300 }
2301 }
2302 /* Control never gets here */
2303 }
2304
2305 /* If maximizing, find the longest possible run, then work backwards. */
2306
2307 else
2308 {
2309 pp = eptr;
2310
2311 #ifdef SUPPORT_UTF8
2312 /* UTF-8 mode */
2313 if (utf8)
2314 {
2315 for (i = min; i < max; i++)
2316 {
2317 int len = 1;
2318 if (eptr >= md->end_subject) break;
2319 GETCHARLEN(c, eptr, len);
2320 if (c > 255)
2321 {
2322 if (op == OP_CLASS) break;
2323 }
2324 else
2325 {
2326 if ((data[c/8] & (1 << (c&7))) == 0) break;
2327 }
2328 eptr += len;
2329 }
2330 for (;;)
2331 {
2332 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2333 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2334 if (eptr-- == pp) break; /* Stop if tried at original pos */
2335 BACKCHAR(eptr);
2336 }
2337 }
2338 else
2339 #endif
2340 /* Not UTF-8 mode */
2341 {
2342 for (i = min; i < max; i++)
2343 {
2344 if (eptr >= md->end_subject) break;
2345 c = *eptr;
2346 if ((data[c/8] & (1 << (c&7))) == 0) break;
2347 eptr++;
2348 }
2349 while (eptr >= pp)
2350 {
2351 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2352 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2353 eptr--;
2354 }
2355 }
2356
2357 RRETURN(MATCH_NOMATCH);
2358 }
2359 }
2360 /* Control never gets here */
2361
2362
2363 /* Match an extended character class. This opcode is encountered only
2364 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2365 mode, because Unicode properties are supported in non-UTF-8 mode. */
2366
2367 #ifdef SUPPORT_UTF8
2368 case OP_XCLASS:
2369 {
2370 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2371 ecode += GET(ecode, 1); /* Advance past the item */
2372
2373 switch (*ecode)
2374 {
2375 case OP_CRSTAR:
2376 case OP_CRMINSTAR:
2377 case OP_CRPLUS:
2378 case OP_CRMINPLUS:
2379 case OP_CRQUERY:
2380 case OP_CRMINQUERY:
2381 c = *ecode++ - OP_CRSTAR;
2382 minimize = (c & 1) != 0;
2383 min = rep_min[c]; /* Pick up values from tables; */
2384 max = rep_max[c]; /* zero for max => infinity */
2385 if (max == 0) max = INT_MAX;
2386 break;
2387
2388 case OP_CRRANGE:
2389 case OP_CRMINRANGE:
2390 minimize = (*ecode == OP_CRMINRANGE);
2391 min = GET2(ecode, 1);
2392 max = GET2(ecode, 3);
2393 if (max == 0) max = INT_MAX;
2394 ecode += 5;
2395 break;
2396
2397 default: /* No repeat follows */
2398 min = max = 1;
2399 break;
2400 }
2401
2402 /* First, ensure the minimum number of matches are present. */
2403
2404 for (i = 1; i <= min; i++)
2405 {
2406 if (eptr >= md->end_subject)
2407 {
2408 SCHECK_PARTIAL();
2409 RRETURN(MATCH_NOMATCH);
2410 }
2411 GETCHARINCTEST(c, eptr);
2412 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2413 }
2414
2415 /* If max == min we can continue with the main loop without the
2416 need to recurse. */
2417
2418 if (min == max) continue;
2419
2420 /* If minimizing, keep testing the rest of the expression and advancing
2421 the pointer while it matches the class. */
2422
2423 if (minimize)
2424 {
2425 for (fi = min;; fi++)
2426 {
2427 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2428 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2429 if (fi >= max) RRETURN(MATCH_NOMATCH);
2430 if (eptr >= md->end_subject)
2431 {
2432 SCHECK_PARTIAL();
2433 RRETURN(MATCH_NOMATCH);
2434 }
2435 GETCHARINCTEST(c, eptr);
2436 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2437 }
2438 /* Control never gets here */
2439 }
2440
2441 /* If maximizing, find the longest possible run, then work backwards. */
2442
2443 else
2444 {
2445 pp = eptr;
2446 for (i = min; i < max; i++)
2447 {
2448 int len = 1;
2449 if (eptr >= md->end_subject) break;
2450 GETCHARLENTEST(c, eptr, len);
2451 if (!_pcre_xclass(c, data)) break;
2452 eptr += len;
2453 }
2454 for(;;)
2455 {
2456 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2457 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2458 if (eptr-- == pp) break; /* Stop if tried at original pos */
2459 if (utf8) BACKCHAR(eptr);
2460 }
2461 RRETURN(MATCH_NOMATCH);
2462 }
2463
2464 /* Control never gets here */
2465 }
2466 #endif /* End of XCLASS */
2467
2468 /* Match a single character, casefully */
2469
2470 case OP_CHAR:
2471 #ifdef SUPPORT_UTF8
2472 if (utf8)
2473 {
2474 length = 1;
2475 ecode++;
2476 GETCHARLEN(fc, ecode, length);
2477 if (length > md->end_subject - eptr)
2478 {
2479 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2480 RRETURN(MATCH_NOMATCH);
2481 }
2482 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2483 }
2484 else
2485 #endif
2486
2487 /* Non-UTF-8 mode */
2488 {
2489 if (md->end_subject - eptr < 1)
2490 {
2491 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2492 RRETURN(MATCH_NOMATCH);
2493 }
2494 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2495 ecode += 2;
2496 }
2497 break;
2498
2499 /* Match a single character, caselessly */
2500
2501 case OP_CHARNC:
2502 #ifdef SUPPORT_UTF8
2503 if (utf8)
2504 {
2505 length = 1;
2506 ecode++;
2507 GETCHARLEN(fc, ecode, length);
2508
2509 if (length > md->end_subject - eptr)
2510 {
2511 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2512 RRETURN(MATCH_NOMATCH);
2513 }
2514
2515 /* If the pattern character's value is < 128, we have only one byte, and
2516 can use the fast lookup table. */
2517
2518 if (fc < 128)
2519 {
2520 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2521 }
2522
2523 /* Otherwise we must pick up the subject character */
2524
2525 else
2526 {
2527 unsigned int dc;
2528 GETCHARINC(dc, eptr);
2529 ecode += length;
2530
2531 /* If we have Unicode property support, we can use it to test the other
2532 case of the character, if there is one. */
2533
2534 if (fc != dc)
2535 {
2536 #ifdef SUPPORT_UCP
2537 if (dc != UCD_OTHERCASE(fc))
2538 #endif
2539 RRETURN(MATCH_NOMATCH);
2540 }
2541 }
2542 }
2543 else
2544 #endif /* SUPPORT_UTF8 */
2545
2546 /* Non-UTF-8 mode */
2547 {
2548 if (md->end_subject - eptr < 1)
2549 {
2550 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2551 RRETURN(MATCH_NOMATCH);
2552 }
2553 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2554 ecode += 2;
2555 }
2556 break;
2557
2558 /* Match a single character repeatedly. */
2559
2560 case OP_EXACT:
2561 min = max = GET2(ecode, 1);
2562 ecode += 3;
2563 goto REPEATCHAR;
2564
2565 case OP_POSUPTO:
2566 possessive = TRUE;
2567 /* Fall through */
2568
2569 case OP_UPTO:
2570 case OP_MINUPTO:
2571 min = 0;
2572 max = GET2(ecode, 1);
2573 minimize = *ecode == OP_MINUPTO;
2574 ecode += 3;
2575 goto REPEATCHAR;
2576
2577 case OP_POSSTAR:
2578 possessive = TRUE;
2579 min = 0;
2580 max = INT_MAX;
2581 ecode++;
2582 goto REPEATCHAR;
2583
2584 case OP_POSPLUS:
2585 possessive = TRUE;
2586 min = 1;
2587 max = INT_MAX;
2588 ecode++;
2589 goto REPEATCHAR;
2590
2591 case OP_POSQUERY:
2592 possessive = TRUE;
2593 min = 0;
2594 max = 1;
2595 ecode++;
2596 goto REPEATCHAR;
2597
2598 case OP_STAR:
2599 case OP_MINSTAR:
2600 case OP_PLUS:
2601 case OP_MINPLUS:
2602 case OP_QUERY:
2603 case OP_MINQUERY:
2604 c = *ecode++ - OP_STAR;
2605 minimize = (c & 1) != 0;
2606
2607 min = rep_min[c]; /* Pick up values from tables; */
2608 max = rep_max[c]; /* zero for max => infinity */
2609 if (max == 0) max = INT_MAX;
2610
2611 /* Common code for all repeated single-character matches. */
2612
2613 REPEATCHAR:
2614 #ifdef SUPPORT_UTF8
2615 if (utf8)
2616 {
2617 length = 1;
2618 charptr = ecode;
2619 GETCHARLEN(fc, ecode, length);
2620 ecode += length;
2621
2622 /* Handle multibyte character matching specially here. There is
2623 support for caseless matching if UCP support is present. */
2624
2625 if (length > 1)
2626 {
2627 #ifdef SUPPORT_UCP
2628 unsigned int othercase;
2629 if ((ims & PCRE_CASELESS) != 0 &&
2630 (othercase = UCD_OTHERCASE(fc)) != fc)
2631 oclength = _pcre_ord2utf8(othercase, occhars);
2632 else oclength = 0;
2633 #endif /* SUPPORT_UCP */
2634
2635 for (i = 1; i <= min; i++)
2636 {
2637 if (eptr <= md->end_subject - length &&
2638 memcmp(eptr, charptr, length) == 0) eptr += length;
2639 #ifdef SUPPORT_UCP
2640 else if (oclength > 0 &&
2641 eptr <= md->end_subject - oclength &&
2642 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2643 #endif /* SUPPORT_UCP */
2644 else
2645 {
2646 CHECK_PARTIAL();
2647 RRETURN(MATCH_NOMATCH);
2648 }
2649 }
2650
2651 if (min == max) continue;
2652
2653 if (minimize)
2654 {
2655 for (fi = min;; fi++)
2656 {
2657 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2658 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2659 if (fi >= max) RRETURN(MATCH_NOMATCH);
2660 if (eptr <= md->end_subject - length &&
2661 memcmp(eptr, charptr, length) == 0) eptr += length;
2662 #ifdef SUPPORT_UCP
2663 else if (oclength > 0 &&
2664 eptr <= md->end_subject - oclength &&
2665 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2666 #endif /* SUPPORT_UCP */
2667 else
2668 {
2669 CHECK_PARTIAL();
2670 RRETURN(MATCH_NOMATCH);
2671 }
2672 }
2673 /* Control never gets here */
2674 }
2675
2676 else /* Maximize */
2677 {
2678 pp = eptr;
2679 for (i = min; i < max; i++)
2680 {
2681 if (eptr <= md->end_subject - length &&
2682 memcmp(eptr, charptr, length) == 0) eptr += length;
2683 #ifdef SUPPORT_UCP
2684 else if (oclength > 0 &&
2685 eptr <= md->end_subject - oclength &&
2686 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2687 #endif /* SUPPORT_UCP */
2688 else break;
2689 }
2690
2691 if (possessive) continue;
2692
2693 for(;;)
2694 {
2695 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2696 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697 if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2698 #ifdef SUPPORT_UCP
2699 eptr--;
2700 BACKCHAR(eptr);
2701 #else /* without SUPPORT_UCP */
2702 eptr -= length;
2703 #endif /* SUPPORT_UCP */
2704 }
2705 }
2706 /* Control never gets here */
2707 }
2708
2709 /* If the length of a UTF-8 character is 1, we fall through here, and
2710 obey the code as for non-UTF-8 characters below, though in this case the
2711 value of fc will always be < 128. */
2712 }
2713 else
2714 #endif /* SUPPORT_UTF8 */
2715
2716 /* When not in UTF-8 mode, load a single-byte character. */
2717
2718 fc = *ecode++;
2719
2720 /* The value of fc at this point is always less than 256, though we may or
2721 may not be in UTF-8 mode. The code is duplicated for the caseless and
2722 caseful cases, for speed, since matching characters is likely to be quite
2723 common. First, ensure the minimum number of matches are present. If min =
2724 max, continue at the same level without recursing. Otherwise, if
2725 minimizing, keep trying the rest of the expression and advancing one
2726 matching character if failing, up to the maximum. Alternatively, if
2727 maximizing, find the maximum number of characters and work backwards. */
2728
2729 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2730 max, eptr));
2731
2732 if ((ims & PCRE_CASELESS) != 0)
2733 {
2734 fc = md->lcc[fc];
2735 for (i = 1; i <= min; i++)
2736 {
2737 if (eptr >= md->end_subject)
2738 {
2739 SCHECK_PARTIAL();
2740 RRETURN(MATCH_NOMATCH);
2741 }
2742 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2743 }
2744 if (min == max) continue;
2745 if (minimize)
2746 {
2747 for (fi = min;; fi++)
2748 {
2749 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2750 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2751 if (fi >= max) RRETURN(MATCH_NOMATCH);
2752 if (eptr >= md->end_subject)
2753 {
2754 SCHECK_PARTIAL();
2755 RRETURN(MATCH_NOMATCH);
2756 }
2757 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2758 }
2759 /* Control never gets here */
2760 }
2761 else /* Maximize */
2762 {
2763 pp = eptr;
2764 for (i = min; i < max; i++)
2765 {
2766 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2767 eptr++;
2768 }
2769
2770 if (possessive) continue;
2771
2772 while (eptr >= pp)
2773 {
2774 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2775 eptr--;
2776 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2777 }
2778 RRETURN(MATCH_NOMATCH);
2779 }
2780 /* Control never gets here */
2781 }
2782
2783 /* Caseful comparisons (includes all multi-byte characters) */
2784
2785 else
2786 {
2787 for (i = 1; i <= min; i++)
2788 {
2789 if (eptr >= md->end_subject)
2790 {
2791 SCHECK_PARTIAL();
2792 RRETURN(MATCH_NOMATCH);
2793 }
2794 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2795 }
2796
2797 if (min == max) continue;
2798
2799 if (minimize)
2800 {
2801 for (fi = min;; fi++)
2802 {
2803 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2804 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2805 if (fi >= max) RRETURN(MATCH_NOMATCH);
2806 if (eptr >= md->end_subject)
2807 {
2808 SCHECK_PARTIAL();
2809 RRETURN(MATCH_NOMATCH);
2810 }
2811 if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2812 }
2813 /* Control never gets here */
2814 }
2815 else /* Maximize */
2816 {
2817 pp = eptr;
2818 for (i = min; i < max; i++)
2819 {
2820 if (eptr >= md->end_subject || fc != *eptr) break;
2821 eptr++;
2822 }
2823 if (possessive) continue;
2824
2825 while (eptr >= pp)
2826 {
2827 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2828 eptr--;
2829 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2830 }
2831 RRETURN(MATCH_NOMATCH);
2832 }
2833 }
2834 /* Control never gets here */
2835
2836 /* Match a negated single one-byte character. The character we are
2837 checking can be multibyte. */
2838
2839 case OP_NOT:
2840 if (eptr >= md->end_subject)
2841 {
2842 SCHECK_PARTIAL();
2843 RRETURN(MATCH_NOMATCH);
2844 }
2845 ecode++;
2846 GETCHARINCTEST(c, eptr);
2847 if ((ims & PCRE_CASELESS) != 0)
2848 {
2849 #ifdef SUPPORT_UTF8
2850 if (c < 256)
2851 #endif
2852 c = md->lcc[c];
2853 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2854 }
2855 else
2856 {
2857 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2858 }
2859 break;
2860
2861 /* Match a negated single one-byte character repeatedly. This is almost a
2862 repeat of the code for a repeated single character, but I haven't found a
2863 nice way of commoning these up that doesn't require a test of the
2864 positive/negative option for each character match. Maybe that wouldn't add
2865 very much to the time taken, but character matching *is* what this is all
2866 about... */
2867
2868 case OP_NOTEXACT:
2869 min = max = GET2(ecode, 1);
2870 ecode += 3;
2871 goto REPEATNOTCHAR;
2872
2873 case OP_NOTUPTO:
2874 case OP_NOTMINUPTO:
2875 min = 0;
2876 max = GET2(ecode, 1);
2877 minimize = *ecode == OP_NOTMINUPTO;
2878 ecode += 3;
2879 goto REPEATNOTCHAR;
2880
2881 case OP_NOTPOSSTAR:
2882 possessive = TRUE;
2883 min = 0;
2884 max = INT_MAX;
2885 ecode++;
2886 goto REPEATNOTCHAR;
2887
2888 case OP_NOTPOSPLUS:
2889 possessive = TRUE;
2890 min = 1;
2891 max = INT_MAX;
2892 ecode++;
2893 goto REPEATNOTCHAR;
2894
2895 case OP_NOTPOSQUERY:
2896 possessive = TRUE;
2897 min = 0;
2898 max = 1;
2899 ecode++;
2900 goto REPEATNOTCHAR;
2901
2902 case OP_NOTPOSUPTO:
2903 possessive = TRUE;
2904 min = 0;
2905 max = GET2(ecode, 1);
2906 ecode += 3;
2907 goto REPEATNOTCHAR;
2908
2909 case OP_NOTSTAR:
2910 case OP_NOTMINSTAR:
2911 case OP_NOTPLUS:
2912 case OP_NOTMINPLUS:
2913 case OP_NOTQUERY:
2914 case OP_NOTMINQUERY:
2915 c = *ecode++ - OP_NOTSTAR;
2916 minimize = (c & 1) != 0;
2917 min = rep_min[c]; /* Pick up values from tables; */
2918 max = rep_max[c]; /* zero for max => infinity */
2919 if (max == 0) max = INT_MAX;
2920
2921 /* Common code for all repeated single-byte matches. */
2922
2923 REPEATNOTCHAR:
2924 fc = *ecode++;
2925
2926 /* The code is duplicated for the caseless and caseful cases, for speed,
2927 since matching characters is likely to be quite common. First, ensure the
2928 minimum number of matches are present. If min = max, continue at the same
2929 level without recursing. Otherwise, if minimizing, keep trying the rest of
2930 the expression and advancing one matching character if failing, up to the
2931 maximum. Alternatively, if maximizing, find the maximum number of
2932 characters and work backwards. */
2933
2934 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2935 max, eptr));
2936
2937 if ((ims & PCRE_CASELESS) != 0)
2938 {
2939 fc = md->lcc[fc];
2940
2941 #ifdef SUPPORT_UTF8
2942 /* UTF-8 mode */
2943 if (utf8)
2944 {
2945 register unsigned int d;
2946 for (i = 1; i <= min; i++)
2947 {
2948 if (eptr >= md->end_subject)
2949 {
2950 SCHECK_PARTIAL();
2951 RRETURN(MATCH_NOMATCH);
2952 }
2953 GETCHARINC(d, eptr);
2954 if (d < 256) d = md->lcc[d];
2955 if (fc == d) RRETURN(MATCH_NOMATCH);
2956 }
2957 }
2958 else
2959 #endif
2960
2961 /* Not UTF-8 mode */
2962 {
2963 for (i = 1; i <= min; i++)
2964 {
2965 if (eptr >= md->end_subject)
2966 {
2967 SCHECK_PARTIAL();
2968 RRETURN(MATCH_NOMATCH);
2969 }
2970 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2971 }
2972 }
2973
2974 if (min == max) continue;
2975
2976 if (minimize)
2977 {
2978 #ifdef SUPPORT_UTF8
2979 /* UTF-8 mode */
2980 if (utf8)
2981 {
2982 register unsigned int d;
2983 for (fi = min;; fi++)
2984 {
2985 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2986 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2987 if (fi >= max) RRETURN(MATCH_NOMATCH);
2988 if (eptr >= md->end_subject)
2989 {
2990 SCHECK_PARTIAL();
2991 RRETURN(MATCH_NOMATCH);
2992 }
2993 GETCHARINC(d, eptr);
2994 if (d < 256) d = md->lcc[d];
2995 if (fc == d) RRETURN(MATCH_NOMATCH);
2996 }
2997 }
2998 else
2999 #endif
3000 /* Not UTF-8 mode */
3001 {
3002 for (fi = min;; fi++)
3003 {
3004 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3005 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3006 if (fi >= max) RRETURN(MATCH_NOMATCH);
3007 if (eptr >= md->end_subject)
3008 {
3009 SCHECK_PARTIAL();
3010 RRETURN(MATCH_NOMATCH);
3011 }
3012 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
3013 }
3014 }
3015 /* Control never gets here */
3016 }
3017
3018 /* Maximize case */
3019
3020 else
3021 {
3022 pp = eptr;
3023
3024 #ifdef SUPPORT_UTF8
3025 /* UTF-8 mode */
3026 if (utf8)
3027 {
3028 register unsigned int d;
3029 for (i = min; i < max; i++)
3030 {
3031 int len = 1;
3032 if (eptr >= md->end_subject) break;
3033 GETCHARLEN(d, eptr, len);
3034 if (d < 256) d = md->lcc[d];
3035 if (fc == d) break;
3036 eptr += len;
3037 }
3038 if (possessive) continue;
3039 for(;;)
3040 {
3041 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3042 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3043 if (eptr-- == pp) break; /* Stop if tried at original pos */
3044 BACKCHAR(eptr);
3045 }
3046 }
3047 else
3048 #endif
3049 /* Not UTF-8 mode */
3050 {
3051 for (i = min; i < max; i++)
3052 {
3053 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
3054 eptr++;
3055 }
3056 if (possessive) continue;
3057 while (eptr >= pp)
3058 {
3059 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3060 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3061 eptr--;
3062 }
3063 }
3064
3065 RRETURN(MATCH_NOMATCH);
3066 }
3067 /* Control never gets here */
3068 }
3069
3070 /* Caseful comparisons */
3071
3072 else
3073 {
3074 #ifdef SUPPORT_UTF8
3075 /* UTF-8 mode */
3076 if (utf8)
3077 {
3078 register unsigned int d;
3079 for (i = 1; i <= min; i++)
3080 {
3081 if (eptr >= md->end_subject)
3082 {
3083 SCHECK_PARTIAL();
3084 RRETURN(MATCH_NOMATCH);
3085 }
3086 GETCHARINC(d, eptr);
3087 if (fc == d) RRETURN(MATCH_NOMATCH);
3088 }
3089 }
3090 else
3091 #endif
3092 /* Not UTF-8 mode */
3093 {
3094 for (i = 1; i <= min; i++)
3095 {
3096 if (eptr >= md->end_subject)
3097 {
3098 SCHECK_PARTIAL();
3099 RRETURN(MATCH_NOMATCH);
3100 }
3101 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3102 }
3103 }
3104
3105 if (min == max) continue;
3106
3107 if (minimize)
3108 {
3109 #ifdef SUPPORT_UTF8
3110 /* UTF-8 mode */
3111 if (utf8)
3112 {
3113 register unsigned int d;
3114 for (fi = min;; fi++)
3115 {
3116 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3117 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3118 if (fi >= max) RRETURN(MATCH_NOMATCH);
3119 if (eptr >= md->end_subject)
3120 {
3121 SCHECK_PARTIAL();
3122 RRETURN(MATCH_NOMATCH);
3123 }
3124 GETCHARINC(d, eptr);
3125 if (fc == d) RRETURN(MATCH_NOMATCH);
3126 }
3127 }
3128 else
3129 #endif
3130 /* Not UTF-8 mode */
3131 {
3132 for (fi = min;; fi++)
3133 {
3134 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3135 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3136 if (fi >= max) RRETURN(MATCH_NOMATCH);
3137 if (eptr >= md->end_subject)
3138 {
3139 SCHECK_PARTIAL();
3140 RRETURN(MATCH_NOMATCH);
3141 }
3142 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3143 }
3144 }
3145 /* Control never gets here */
3146 }
3147
3148 /* Maximize case */
3149
3150 else
3151 {
3152 pp = eptr;
3153
3154 #ifdef SUPPORT_UTF8
3155 /* UTF-8 mode */
3156 if (utf8)
3157 {
3158 register unsigned int d;
3159 for (i = min; i < max; i++)
3160 {
3161 int len = 1;
3162 if (eptr >= md->end_subject) break;
3163 GETCHARLEN(d, eptr, len);
3164 if (fc == d) break;
3165 eptr += len;
3166 }
3167 if (possessive) continue;
3168 for(;;)
3169 {
3170 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3171 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3172 if (eptr-- == pp) break; /* Stop if tried at original pos */
3173 BACKCHAR(eptr);
3174 }
3175 }
3176 else
3177 #endif
3178 /* Not UTF-8 mode */
3179 {
3180 for (i = min; i < max; i++)
3181 {
3182 if (eptr >= md->end_subject || fc == *eptr) break;
3183 eptr++;
3184 }
3185 if (possessive) continue;
3186 while (eptr >= pp)
3187 {
3188 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3189 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3190 eptr--;
3191 }
3192 }
3193
3194 RRETURN(MATCH_NOMATCH);
3195 }
3196 }
3197 /* Control never gets here */
3198
3199 /* Match a single character type repeatedly; several different opcodes
3200 share code. This is very similar to the code for single characters, but we
3201 repeat it in the interests of efficiency. */
3202
3203 case OP_TYPEEXACT:
3204 min = max = GET2(ecode, 1);
3205 minimize = TRUE;
3206 ecode += 3;
3207 goto REPEATTYPE;
3208
3209 case OP_TYPEUPTO:
3210 case OP_TYPEMINUPTO:
3211 min = 0;
3212 max = GET2(ecode, 1);
3213 minimize = *ecode == OP_TYPEMINUPTO;
3214 ecode += 3;
3215 goto REPEATTYPE;
3216
3217 case OP_TYPEPOSSTAR:
3218 possessive = TRUE;
3219 min = 0;
3220 max = INT_MAX;
3221 ecode++;
3222 goto REPEATTYPE;
3223
3224 case OP_TYPEPOSPLUS:
3225 possessive = TRUE;
3226 min = 1;
3227 max = INT_MAX;
3228 ecode++;
3229 goto REPEATTYPE;
3230
3231 case OP_TYPEPOSQUERY:
3232 possessive = TRUE;
3233 min = 0;
3234 max = 1;
3235 ecode++;
3236 goto REPEATTYPE;
3237
3238 case OP_TYPEPOSUPTO:
3239 possessive = TRUE;
3240 min = 0;
3241 max = GET2(ecode, 1);
3242 ecode += 3;
3243 goto REPEATTYPE;
3244
3245 case OP_TYPESTAR:
3246 case OP_TYPEMINSTAR:
3247 case OP_TYPEPLUS:
3248 case OP_TYPEMINPLUS:
3249 case OP_TYPEQUERY:
3250 case OP_TYPEMINQUERY:
3251 c = *ecode++ - OP_TYPESTAR;
3252 minimize = (c & 1) != 0;
3253 min = rep_min[c]; /* Pick up values from tables; */
3254 max = rep_max[c]; /* zero for max => infinity */
3255 if (max == 0) max = INT_MAX;
3256
3257 /* Common code for all repeated single character type matches. Note that
3258 in UTF-8 mode, '.' matches a character of any length, but for the other
3259 character types, the valid characters are all one-byte long. */
3260
3261 REPEATTYPE:
3262 ctype = *ecode++; /* Code for the character type */
3263
3264 #ifdef SUPPORT_UCP
3265 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3266 {
3267 prop_fail_result = ctype == OP_NOTPROP;
3268 prop_type = *ecode++;
3269 prop_value = *ecode++;
3270 }
3271 else prop_type = -1;
3272 #endif
3273
3274 /* First, ensure the minimum number of matches are present. Use inline
3275 code for maximizing the speed, and do the type test once at the start
3276 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3277 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3278 and single-bytes. */
3279
3280 if (min > 0)
3281 {
3282 #ifdef SUPPORT_UCP
3283 if (prop_type >= 0)
3284 {
3285 switch(prop_type)
3286 {
3287 case PT_ANY:
3288 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3289 for (i = 1; i <= min; i++)
3290 {
3291 if (eptr >= md->end_subject)
3292 {
3293 SCHECK_PARTIAL();
3294 RRETURN(MATCH_NOMATCH);
3295 }
3296 GETCHARINCTEST(c, eptr);
3297 }
3298 break;
3299
3300 case PT_LAMP:
3301 for (i = 1; i <= min; i++)
3302 {
3303 if (eptr >= md->end_subject)
3304 {
3305 SCHECK_PARTIAL();
3306 RRETURN(MATCH_NOMATCH);
3307 }
3308 GETCHARINCTEST(c, eptr);
3309 prop_chartype = UCD_CHARTYPE(c);
3310 if ((prop_chartype == ucp_Lu ||
3311 prop_chartype == ucp_Ll ||
3312 prop_chartype == ucp_Lt) == prop_fail_result)
3313 RRETURN(MATCH_NOMATCH);
3314 }
3315 break;
3316
3317 case PT_GC:
3318 for (i = 1; i <= min; i++)
3319 {
3320 if (eptr >= md->end_subject)
3321 {
3322 SCHECK_PARTIAL();
3323 RRETURN(MATCH_NOMATCH);
3324 }
3325 GETCHARINCTEST(c, eptr);
3326 prop_category = UCD_CATEGORY(c);
3327 if ((prop_category == prop_value) == prop_fail_result)
3328 RRETURN(MATCH_NOMATCH);
3329 }
3330 break;
3331
3332 case PT_PC:
3333 for (i = 1; i <= min; i++)
3334 {
3335 if (eptr >= md->end_subject)
3336 {
3337 SCHECK_PARTIAL();
3338 RRETURN(MATCH_NOMATCH);
3339 }
3340 GETCHARINCTEST(c, eptr);
3341 prop_chartype = UCD_CHARTYPE(c);
3342 if ((prop_chartype == prop_value) == prop_fail_result)
3343 RRETURN(MATCH_NOMATCH);
3344 }
3345 break;
3346
3347 case PT_SC:
3348 for (i = 1; i <= min; i++)
3349 {
3350 if (eptr >= md->end_subject)
3351 {
3352 SCHECK_PARTIAL();
3353 RRETURN(MATCH_NOMATCH);
3354 }
3355 GETCHARINCTEST(c, eptr);
3356 prop_script = UCD_SCRIPT(c);
3357 if ((prop_script == prop_value) == prop_fail_result)
3358 RRETURN(MATCH_NOMATCH);
3359 }
3360 break;
3361
3362 default:
3363 RRETURN(PCRE_ERROR_INTERNAL);
3364 }
3365 }
3366
3367 /* Match extended Unicode sequences. We will get here only if the
3368 support is in the binary; otherwise a compile-time error occurs. */
3369
3370 else if (ctype == OP_EXTUNI)
3371 {
3372 for (i = 1; i <= min; i++)
3373 {
3374 if (eptr >= md->end_subject)
3375 {
3376 SCHECK_PARTIAL();
3377 RRETURN(MATCH_NOMATCH);
3378 }
3379 GETCHARINCTEST(c, eptr);
3380 prop_category = UCD_CATEGORY(c);
3381 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3382 while (eptr < md->end_subject)
3383 {
3384 int len = 1;
3385 if (!utf8) c = *eptr;
3386 else { GETCHARLEN(c, eptr, len); }
3387 prop_category = UCD_CATEGORY(c);
3388 if (prop_category != ucp_M) break;
3389 eptr += len;
3390 }
3391 }
3392 }
3393
3394 else
3395 #endif /* SUPPORT_UCP */
3396
3397 /* Handle all other cases when the coding is UTF-8 */
3398
3399 #ifdef SUPPORT_UTF8
3400 if (utf8) switch(ctype)
3401 {
3402 case OP_ANY:
3403 for (i = 1; i <= min; i++)
3404 {
3405 if (eptr >= md->end_subject)
3406 {
3407 SCHECK_PARTIAL();
3408 RRETURN(MATCH_NOMATCH);
3409 }
3410 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3411 eptr++;
3412 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3413 }
3414 break;
3415
3416 case OP_ALLANY:
3417 for (i = 1; i <= min; i++)
3418 {
3419 if (eptr >= md->end_subject)
3420 {
3421 SCHECK_PARTIAL();
3422 RRETURN(MATCH_NOMATCH);
3423 }
3424 eptr++;
3425 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3426 }
3427 break;
3428
3429 case OP_ANYBYTE:
3430 if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3431 eptr += min;
3432 break;
3433
3434 case OP_ANYNL:
3435 for (i = 1; i <= min; i++)
3436 {
3437 if (eptr >= md->end_subject)
3438 {
3439 SCHECK_PARTIAL();
3440 RRETURN(MATCH_NOMATCH);
3441 }
3442 GETCHARINC(c, eptr);
3443 switch(c)
3444 {
3445 default: RRETURN(MATCH_NOMATCH);
3446 case 0x000d:
3447 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3448 break;
3449
3450 case 0x000a:
3451 break;
3452
3453 case 0x000b:
3454 case 0x000c:
3455 case 0x0085:
3456 case 0x2028:
3457 case 0x2029:
3458 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3459 break;
3460 }
3461 }
3462 break;
3463
3464 case OP_NOT_HSPACE:
3465 for (i = 1; i <= min; i++)
3466 {
3467 if (eptr >= md->end_subject)
3468 {
3469 SCHECK_PARTIAL();
3470 RRETURN(MATCH_NOMATCH);
3471 }
3472 GETCHARINC(c, eptr);
3473 switch(c)
3474 {
3475 default: break;
3476 case 0x09: /* HT */
3477 case 0x20: /* SPACE */
3478 case 0xa0: /* NBSP */
3479 case 0x1680: /* OGHAM SPACE MARK */
3480 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3481 case 0x2000: /* EN QUAD */
3482 case 0x2001: /* EM QUAD */
3483 case 0x2002: /* EN SPACE */
3484 case 0x2003: /* EM SPACE */
3485 case 0x2004: /* THREE-PER-EM SPACE */
3486 case 0x2005: /* FOUR-PER-EM SPACE */
3487 case 0x2006: /* SIX-PER-EM SPACE */
3488 case 0x2007: /* FIGURE SPACE */
3489 case 0x2008: /* PUNCTUATION SPACE */
3490 case 0x2009: /* THIN SPACE */
3491 case 0x200A: /* HAIR SPACE */
3492 case 0x202f: /* NARROW NO-BREAK SPACE */
3493 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3494 case 0x3000: /* IDEOGRAPHIC SPACE */
3495 RRETURN(MATCH_NOMATCH);
3496 }
3497 }
3498 break;
3499
3500 case OP_HSPACE:
3501 for (i = 1; i <= min; i++)
3502 {
3503 if (eptr >= md->end_subject)
3504 {
3505 SCHECK_PARTIAL();
3506 RRETURN(MATCH_NOMATCH);
3507 }
3508 GETCHARINC(c, eptr);
3509 switch(c)
3510 {
3511 default: RRETURN(MATCH_NOMATCH);
3512 case 0x09: /* HT */
3513 case 0x20: /* SPACE */
3514 case 0xa0: /* NBSP */
3515 case 0x1680: /* OGHAM SPACE MARK */
3516 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3517 case 0x2000: /* EN QUAD */
3518 case 0x2001: /* EM QUAD */
3519 case 0x2002: /* EN SPACE */
3520 case 0x2003: /* EM SPACE */
3521 case 0x2004: /* THREE-PER-EM SPACE */
3522 case 0x2005: /* FOUR-PER-EM SPACE */
3523 case 0x2006: /* SIX-PER-EM SPACE */
3524 case 0x2007: /* FIGURE SPACE */
3525 case 0x2008: /* PUNCTUATION SPACE */
3526 case 0x2009: /* THIN SPACE */
3527 case 0x200A: /* HAIR SPACE */
3528 case 0x202f: /* NARROW NO-BREAK SPACE */
3529 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3530 case 0x3000: /* IDEOGRAPHIC SPACE */
3531 break;
3532 }
3533 }
3534 break;
3535
3536 case OP_NOT_VSPACE:
3537 for (i = 1; i <= min; i++)
3538 {
3539 if (eptr >= md->end_subject)
3540 {
3541 SCHECK_PARTIAL();
3542 RRETURN(MATCH_NOMATCH);
3543 }
3544 GETCHARINC(c, eptr);
3545 switch(c)
3546 {
3547 default: break;
3548 case 0x0a: /* LF */
3549 case 0x0b: /* VT */
3550 case 0x0c: /* FF */
3551 case 0x0d: /* CR */
3552 case 0x85: /* NEL */
3553 case 0x2028: /* LINE SEPARATOR */
3554 case 0x2029: /* PARAGRAPH SEPARATOR */
3555 RRETURN(MATCH_NOMATCH);
3556 }
3557 }
3558 break;
3559
3560 case OP_VSPACE:
3561 for (i = 1; i <= min; i++)
3562 {
3563 if (eptr >= md->end_subject)
3564 {
3565 SCHECK_PARTIAL();
3566 RRETURN(MATCH_NOMATCH);
3567 }
3568 GETCHARINC(c, eptr);
3569 switch(c)
3570 {
3571 default: RRETURN(MATCH_NOMATCH);
3572 case 0x0a: /* LF */
3573 case 0x0b: /* VT */
3574 case 0x0c: /* FF */
3575 case 0x0d: /* CR */
3576 case 0x85: /* NEL */
3577 case 0x2028: /* LINE SEPARATOR */
3578 case 0x2029: /* PARAGRAPH SEPARATOR */
3579 break;
3580 }
3581 }
3582 break;
3583
3584 case OP_NOT_DIGIT:
3585 for (i = 1; i <= min; i++)
3586 {
3587 if (eptr >= md->end_subject)
3588 {
3589 SCHECK_PARTIAL();
3590 RRETURN(MATCH_NOMATCH);
3591 }
3592 GETCHARINC(c, eptr);
3593 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3594 RRETURN(MATCH_NOMATCH);
3595 }
3596 break;
3597
3598 case OP_DIGIT:
3599 for (i = 1; i <= min; i++)
3600 {
3601 if (eptr >= md->end_subject)
3602 {
3603 SCHECK_PARTIAL();
3604 RRETURN(MATCH_NOMATCH);
3605 }
3606 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3607 RRETURN(MATCH_NOMATCH);
3608 /* No need to skip more bytes - we know it's a 1-byte character */
3609 }
3610 break;
3611
3612 case OP_NOT_WHITESPACE:
3613 for (i = 1; i <= min; i++)
3614 {
3615 if (eptr >= md->end_subject)
3616 {
3617 SCHECK_PARTIAL();
3618 RRETURN(MATCH_NOMATCH);
3619 }
3620 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3621 RRETURN(MATCH_NOMATCH);
3622 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3623 }
3624 break;
3625
3626 case OP_WHITESPACE:
3627 for (i = 1; i <= min; i++)
3628 {
3629 if (eptr >= md->end_subject)
3630 {
3631 SCHECK_PARTIAL();
3632 RRETURN(MATCH_NOMATCH);
3633 }
3634 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3635 RRETURN(MATCH_NOMATCH);
3636 /* No need to skip more bytes - we know it's a 1-byte character */
3637 }
3638 break;
3639
3640 case OP_NOT_WORDCHAR:
3641 for (i = 1; i <= min; i++)
3642 {
3643 if (eptr >= md->end_subject ||
3644 (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3645 RRETURN(MATCH_NOMATCH);
3646 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3647 }
3648 break;
3649
3650 case OP_WORDCHAR:
3651 for (i = 1; i <= min; i++)
3652 {
3653 if (eptr >= md->end_subject)
3654 {
3655 SCHECK_PARTIAL();
3656 RRETURN(MATCH_NOMATCH);
3657 }
3658 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3659 RRETURN(MATCH_NOMATCH);
3660 /* No need to skip more bytes - we know it's a 1-byte character */
3661 }
3662 break;
3663
3664 default:
3665 RRETURN(PCRE_ERROR_INTERNAL);
3666 } /* End switch(ctype) */
3667
3668 else
3669 #endif /* SUPPORT_UTF8 */
3670
3671 /* Code for the non-UTF-8 case for minimum matching of operators other
3672 than OP_PROP and OP_NOTPROP. */
3673
3674 switch(ctype)
3675 {
3676 case OP_ANY:
3677 for (i = 1; i <= min; i++)
3678 {
3679 if (eptr >= md->end_subject)
3680 {
3681 SCHECK_PARTIAL();
3682 RRETURN(MATCH_NOMATCH);
3683 }
3684 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3685 eptr++;
3686 }
3687 break;
3688
3689 case OP_ALLANY:
3690 if (eptr > md->end_subject - min)
3691 {
3692 SCHECK_PARTIAL();
3693 RRETURN(MATCH_NOMATCH);
3694 }
3695 eptr += min;
3696 break;
3697
3698 case OP_ANYBYTE:
3699 if (eptr > md->end_subject - min)
3700 {
3701 SCHECK_PARTIAL();
3702 RRETURN(MATCH_NOMATCH);
3703 }
3704 eptr += min;
3705 break;
3706
3707 case OP_ANYNL:
3708 for (i = 1; i <= min; i++)
3709 {
3710 if (eptr >= md->end_subject)
3711 {
3712 SCHECK_PARTIAL();
3713 RRETURN(MATCH_NOMATCH);
3714 }
3715 switch(*eptr++)
3716 {
3717 default: RRETURN(MATCH_NOMATCH);
3718 case 0x000d:
3719 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3720 break;
3721 case 0x000a:
3722 break;
3723
3724 case 0x000b:
3725 case 0x000c:
3726 case 0x0085:
3727 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3728 break;
3729 }
3730 }
3731 break;
3732
3733 case OP_NOT_HSPACE:
3734 for (i = 1; i <= min; i++)
3735 {
3736 if (eptr >= md->end_subject)
3737 {
3738 SCHECK_PARTIAL();
3739 RRETURN(MATCH_NOMATCH);
3740 }
3741 switch(*eptr++)
3742 {
3743 default: break;
3744 case 0x09: /* HT */
3745 case 0x20: /* SPACE */
3746 case 0xa0: /* NBSP */
3747 RRETURN(MATCH_NOMATCH);
3748 }
3749 }
3750 break;
3751
3752 case OP_HSPACE:
3753 for (i = 1; i <= min; i++)
3754 {
3755 if (eptr >= md->end_subject)
3756 {
3757 SCHECK_PARTIAL();
3758 RRETURN(MATCH_NOMATCH);
3759 }
3760 switch(*eptr++)
3761 {
3762 default: RRETURN(MATCH_NOMATCH);
3763 case 0x09: /* HT */
3764 case 0x20: /* SPACE */
3765 case 0xa0: /* NBSP */
3766 break;
3767 }
3768 }
3769 break;
3770
3771 case OP_NOT_VSPACE:
3772 for (i = 1; i <= min; i++)
3773 {
3774 if (eptr >= md->end_subject)
3775 {
3776 SCHECK_PARTIAL();
3777 RRETURN(MATCH_NOMATCH);
3778 }
3779 switch(*eptr++)
3780 {
3781 default: break;
3782 case 0x0a: /* LF */
3783 case 0x0b: /* VT */
3784 case 0x0c: /* FF */
3785 case 0x0d: /* CR */
3786 case 0x85: /* NEL */
3787 RRETURN(MATCH_NOMATCH);
3788 }
3789 }
3790 break;
3791
3792 case OP_VSPACE:
3793 for (i = 1; i <= min; i++)
3794 {
3795 if (eptr >= md->end_subject)
3796 {
3797 SCHECK_PARTIAL();
3798 RRETURN(MATCH_NOMATCH);
3799 }
3800 switch(*eptr++)
3801 {
3802 default: RRETURN(MATCH_NOMATCH);
3803 case 0x0a: /* LF */
3804 case 0x0b: /* VT */
3805 case 0x0c: /* FF */
3806 case 0x0d: /* CR */
3807 case 0x85: /* NEL */
3808 break;
3809 }
3810 }
3811 break;
3812
3813 case OP_NOT_DIGIT:
3814 for (i = 1; i <= min; i++)
3815 {
3816 if (eptr >= md->end_subject)
3817 {
3818 SCHECK_PARTIAL();
3819 RRETURN(MATCH_NOMATCH);
3820 }
3821 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3822 }
3823 break;
3824
3825 case OP_DIGIT:
3826 for (i = 1; i <= min; i++)
3827 {
3828 if (eptr >= md->end_subject)
3829 {
3830 SCHECK_PARTIAL();
3831 RRETURN(MATCH_NOMATCH);
3832 }
3833 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3834 }
3835 break;
3836
3837 case OP_NOT_WHITESPACE:
3838 for (i = 1; i <= min; i++)
3839 {
3840 if (eptr >= md->end_subject)
3841 {
3842 SCHECK_PARTIAL();
3843 RRETURN(MATCH_NOMATCH);
3844 }
3845 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3846 }
3847 break;
3848
3849 case OP_WHITESPACE:
3850 for (i = 1; i <= min; i++)
3851 {
3852 if (eptr >= md->end_subject)
3853 {
3854 SCHECK_PARTIAL();
3855 RRETURN(MATCH_NOMATCH);
3856 }
3857 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3858 }
3859 break;
3860
3861 case OP_NOT_WORDCHAR:
3862 for (i = 1; i <= min; i++)
3863 {
3864 if (eptr >= md->end_subject)
3865 {
3866 SCHECK_PARTIAL();
3867 RRETURN(MATCH_NOMATCH);
3868 }
3869 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3870 RRETURN(MATCH_NOMATCH);
3871 }
3872 break;
3873
3874 case OP_WORDCHAR:
3875 for (i = 1; i <= min; i++)
3876 {
3877 if (eptr >= md->end_subject)
3878 {
3879 SCHECK_PARTIAL();
3880 RRETURN(MATCH_NOMATCH);
3881 }
3882 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3883 RRETURN(MATCH_NOMATCH);
3884 }
3885 break;
3886
3887 default:
3888 RRETURN(PCRE_ERROR_INTERNAL);
3889 }
3890 }
3891
3892 /* If min = max, continue at the same level without recursing */
3893
3894 if (min == max) continue;
3895
3896 /* If minimizing, we have to test the rest of the pattern before each
3897 subsequent match. Again, separate the UTF-8 case for speed, and also
3898 separate the UCP cases. */
3899
3900 if (minimize)
3901 {
3902 #ifdef SUPPORT_UCP
3903 if (prop_type >= 0)
3904 {
3905 switch(prop_type)
3906 {
3907 case PT_ANY:
3908 for (fi = min;; fi++)
3909 {
3910 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3911 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3912 if (fi >= max) RRETURN(MATCH_NOMATCH);
3913 if (eptr >= md->end_subject)
3914 {
3915 SCHECK_PARTIAL();
3916 RRETURN(MATCH_NOMATCH);
3917 }
3918 GETCHARINC(c, eptr);
3919 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3920 }
3921 /* Control never gets here */
3922
3923 case PT_LAMP:
3924 for (fi = min;; fi++)
3925 {
3926 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3927 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3928 if (fi >= max) RRETURN(MATCH_NOMATCH);
3929 if (eptr >= md->end_subject)
3930 {
3931 SCHECK_PARTIAL();
3932 RRETURN(MATCH_NOMATCH);
3933 }
3934 GETCHARINC(c, eptr);
3935 prop_chartype = UCD_CHARTYPE(c);
3936 if ((prop_chartype == ucp_Lu ||
3937 prop_chartype == ucp_Ll ||
3938 prop_chartype == ucp_Lt) == prop_fail_result)
3939 RRETURN(MATCH_NOMATCH);
3940 }
3941 /* Control never gets here */
3942
3943 case PT_GC:
3944 for (fi = min;; fi++)
3945 {
3946 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3947 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3948 if (fi >= max) RRETURN(MATCH_NOMATCH);
3949 if (eptr >= md->end_subject)
3950 {
3951 SCHECK_PARTIAL();
3952 RRETURN(MATCH_NOMATCH);
3953 }
3954 GETCHARINC(c, eptr);
3955 prop_category = UCD_CATEGORY(c);
3956 if ((prop_category == prop_value) == prop_fail_result)
3957 RRETURN(MATCH_NOMATCH);
3958 }
3959 /* Control never gets here */
3960
3961 case PT_PC:
3962 for (fi = min;; fi++)
3963 {
3964 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3965 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3966 if (fi >= max) RRETURN(MATCH_NOMATCH);
3967 if (eptr >= md->end_subject)
3968 {
3969 SCHECK_PARTIAL();
3970 RRETURN(MATCH_NOMATCH);
3971 }
3972 GETCHARINC(c, eptr);
3973 prop_chartype = UCD_CHARTYPE(c);
3974 if ((prop_chartype == prop_value) == prop_fail_result)
3975 RRETURN(MATCH_NOMATCH);
3976 }
3977 /* Control never gets here */
3978
3979 case PT_SC:
3980 for (fi = min;; fi++)
3981 {
3982 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3983 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3984 if (fi >= max) RRETURN(MATCH_NOMATCH);
3985 if (eptr >= md->end_subject)
3986 {
3987 SCHECK_PARTIAL();
3988 RRETURN(MATCH_NOMATCH);
3989 }
3990 GETCHARINC(c, eptr);
3991 prop_script = UCD_SCRIPT(c);
3992 if ((prop_script == prop_value) == prop_fail_result)
3993 RRETURN(MATCH_NOMATCH);
3994 }
3995 /* Control never gets here */
3996
3997 default:
3998 RRETURN(PCRE_ERROR_INTERNAL);
3999 }
4000 }
4001
4002 /* Match extended Unicode sequences. We will get here only if the
4003 support is in the binary; otherwise a compile-time error occurs. */
4004
4005 else if (ctype == OP_EXTUNI)
4006 {
4007 for (fi = min;; fi++)
4008 {
4009 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4010 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4011 if (fi >= max) RRETURN(MATCH_NOMATCH);
4012 if (eptr >= md->end_subject)
4013 {
4014 SCHECK_PARTIAL();
4015 RRETURN(MATCH_NOMATCH);
4016 }
4017 GETCHARINCTEST(c, eptr);
4018 prop_category = UCD_CATEGORY(c);
4019 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
4020 while (eptr < md->end_subject)
4021 {
4022 int len = 1;
4023 if (!utf8) c = *eptr;
4024 else { GETCHARLEN(c, eptr, len); }
4025 prop_category = UCD_CATEGORY(c);
4026 if (prop_category != ucp_M) break;
4027 eptr += len;
4028 }
4029 }
4030 }
4031
4032 else
4033 #endif /* SUPPORT_UCP */
4034
4035 #ifdef SUPPORT_UTF8
4036 /* UTF-8 mode */
4037 if (utf8)
4038 {
4039 for (fi = min;; fi++)
4040 {
4041 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4042 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4043 if (fi >= max) RRETURN(MATCH_NOMATCH);
4044 if (eptr >= md->end_subject)
4045 {
4046 SCHECK_PARTIAL();
4047 RRETURN(MATCH_NOMATCH);
4048 }
4049 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4050 RRETURN(MATCH_NOMATCH);
4051 GETCHARINC(c, eptr);
4052 switch(ctype)
4053 {
4054 case OP_ANY: /* This is the non-NL case */
4055 case OP_ALLANY:
4056 case OP_ANYBYTE:
4057 break;
4058
4059 case OP_ANYNL:
4060 switch(c)
4061 {
4062 default: RRETURN(MATCH_NOMATCH);
4063 case 0x000d:
4064 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4065 break;
4066 case 0x000a:
4067 break;
4068
4069 case 0x000b:
4070 case 0x000c:
4071 case 0x0085:
4072 case 0x2028:
4073 case 0x2029:
4074 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4075 break;
4076 }
4077 break;
4078
4079 case OP_NOT_HSPACE:
4080 switch(c)
4081 {
4082 default: break;
4083 case 0x09: /* HT */
4084 case 0x20: /* SPACE */
4085 case 0xa0: /* NBSP */
4086 case 0x1680: /* OGHAM SPACE MARK */
4087 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4088 case 0x2000: /* EN QUAD */
4089 case 0x2001: /* EM QUAD */
4090 case 0x2002: /* EN SPACE */
4091 case 0x2003: /* EM SPACE */
4092 case 0x2004: /* THREE-PER-EM SPACE */
4093 case 0x2005: /* FOUR-PER-EM SPACE */
4094 case 0x2006: /* SIX-PER-EM SPACE */
4095 case 0x2007: /* FIGURE SPACE */
4096 case 0x2008: /* PUNCTUATION SPACE */
4097 case 0x2009: /* THIN SPACE */
4098 case 0x200A: /* HAIR SPACE */
4099 case 0x202f: /* NARROW NO-BREAK SPACE */
4100 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4101 case 0x3000: /* IDEOGRAPHIC SPACE */
4102 RRETURN(MATCH_NOMATCH);
4103 }
4104 break;
4105
4106 case OP_HSPACE:
4107 switch(c)
4108 {
4109 default: RRETURN(MATCH_NOMATCH);
4110 case 0x09: /* HT */
4111 case 0x20: /* SPACE */
4112 case 0xa0: /* NBSP */
4113 case 0x1680: /* OGHAM SPACE MARK */
4114 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4115 case 0x2000: /* EN QUAD */
4116 case 0x2001: /* EM QUAD */
4117 case 0x2002: /* EN SPACE */
4118 case 0x2003: /* EM SPACE */
4119 case 0x2004: /* THREE-PER-EM SPACE */
4120 case 0x2005: /* FOUR-PER-EM SPACE */
4121 case 0x2006: /* SIX-PER-EM SPACE */
4122 case 0x2007: /* FIGURE SPACE */
4123 case 0x2008: /* PUNCTUATION SPACE */
4124 case 0x2009: /* THIN SPACE */
4125 case 0x200A: /* HAIR SPACE */
4126 case 0x202f: /* NARROW NO-BREAK SPACE */
4127 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4128 case 0x3000: /* IDEOGRAPHIC SPACE */
4129 break;
4130 }
4131 break;
4132
4133 case OP_NOT_VSPACE:
4134 switch(c)
4135 {
4136 default: break;
4137 case 0x0a: /* LF */
4138 case 0x0b: /* VT */
4139 case 0x0c: /* FF */
4140 case 0x0d: /* CR */
4141 case 0x85: /* NEL */
4142 case 0x2028: /* LINE SEPARATOR */
4143 case 0x2029: /* PARAGRAPH SEPARATOR */
4144 RRETURN(MATCH_NOMATCH);
4145 }
4146 break;
4147
4148 case OP_VSPACE:
4149 switch(c)
4150 {
4151 default: RRETURN(MATCH_NOMATCH);
4152 case 0x0a: /* LF */
4153 case 0x0b: /* VT */
4154 case 0x0c: /* FF */
4155 case 0x0d: /* CR */
4156 case 0x85: /* NEL */
4157 case 0x2028: /* LINE SEPARATOR */
4158 case 0x2029: /* PARAGRAPH SEPARATOR */
4159 break;
4160 }
4161 break;
4162
4163 case OP_NOT_DIGIT:
4164 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4165 RRETURN(MATCH_NOMATCH);
4166 break;
4167
4168 case OP_DIGIT:
4169 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4170 RRETURN(MATCH_NOMATCH);
4171 break;
4172
4173 case OP_NOT_WHITESPACE:
4174 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4175 RRETURN(MATCH_NOMATCH);
4176 break;
4177
4178 case OP_WHITESPACE:
4179 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4180 RRETURN(MATCH_NOMATCH);
4181 break;
4182
4183 case OP_NOT_WORDCHAR:
4184 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4185 RRETURN(MATCH_NOMATCH);
4186 break;
4187
4188 case OP_WORDCHAR:
4189 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4190 RRETURN(MATCH_NOMATCH);
4191 break;
4192
4193 default:
4194 RRETURN(PCRE_ERROR_INTERNAL);
4195 }
4196 }
4197 }
4198 else
4199 #endif
4200 /* Not UTF-8 mode */
4201 {
4202 for (fi = min;; fi++)
4203 {
4204 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4205 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4206 if (fi >= max) RRETURN(MATCH_NOMATCH);
4207 if (eptr >= md->end_subject)
4208 {
4209 SCHECK_PARTIAL();
4210 RRETURN(MATCH_NOMATCH);
4211 }
4212 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4213 RRETURN(MATCH_NOMATCH);
4214 c = *eptr++;
4215 switch(ctype)
4216 {
4217 case OP_ANY: /* This is the non-NL case */
4218 case OP_ALLANY:
4219 case OP_ANYBYTE:
4220 break;
4221
4222 case OP_ANYNL:
4223 switch(c)
4224 {
4225 default: RRETURN(MATCH_NOMATCH);
4226 case 0x000d:
4227 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4228 break;
4229
4230 case 0x000a:
4231 break;
4232
4233 case 0x000b:
4234 case 0x000c:
4235 case 0x0085:
4236 if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4237 break;
4238 }
4239 break;
4240
4241 case OP_NOT_HSPACE:
4242 switch(c)
4243 {
4244 default: break;
4245 case 0x09: /* HT */
4246 case 0x20: /* SPACE */
4247 case 0xa0: /* NBSP */
4248 RRETURN(MATCH_NOMATCH);
4249 }
4250 break;
4251
4252 case OP_HSPACE:
4253 switch(c)
4254 {
4255 default: RRETURN(MATCH_NOMATCH);
4256 case 0x09: /* HT */
4257 case 0x20: /* SPACE */
4258 case 0xa0: /* NBSP */
4259 break;
4260 }
4261 break;
4262
4263 case OP_NOT_VSPACE:
4264 switch(c)
4265 {
4266 default: break;
4267 case 0x0a: /* LF */
4268 case 0x0b: /* VT */
4269 case 0x0c: /* FF */
4270 case 0x0d: /* CR */
4271 case 0x85: /* NEL */
4272 RRETURN(MATCH_NOMATCH);
4273 }
4274 break;
4275
4276 case OP_VSPACE:
4277 switch(c)
4278 {
4279 default: RRETURN(MATCH_NOMATCH);
4280 case 0x0a: /* LF */
4281 case 0x0b: /* VT */
4282 case 0x0c: /* FF */
4283 case 0x0d: /* CR */
4284 case 0x85: /* NEL */
4285 break;
4286 }
4287 break;
4288
4289 case OP_NOT_DIGIT:
4290 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
4291 break;
4292
4293 case OP_DIGIT:
4294 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
4295 break;
4296
4297 case OP_NOT_WHITESPACE:
4298 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
4299 break;
4300
4301 case OP_WHITESPACE:
4302 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
4303 break;
4304
4305 case OP_NOT_WORDCHAR:
4306 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
4307 break;
4308
4309 case OP_WORDCHAR:
4310 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
4311 break;
4312
4313 default:
4314 RRETURN(PCRE_ERROR_INTERNAL);
4315 }
4316 }
4317 }
4318 /* Control never gets here */
4319 }
4320
4321 /* If maximizing, it is worth using inline code for speed, doing the type
4322 test once at the start (i.e. keep it out of the loop). Again, keep the
4323 UTF-8 and UCP stuff separate. */
4324
4325 else
4326 {
4327 pp = eptr; /* Remember where we started */
4328
4329 #ifdef SUPPORT_UCP
4330 if (prop_type >= 0)
4331 {
4332 switch(prop_type)
4333 {
4334 case PT_ANY:
4335 for (i = min; i < max; i++)
4336 {
4337 int len = 1;
4338 if (eptr >= md->end_subject) break;
4339 GETCHARLEN(c, eptr, len);
4340 if (prop_fail_result) break;
4341 eptr+= len;
4342 }
4343 break;
4344
4345 case PT_LAMP:
4346 for (i = min; i < max; i++)
4347 {
4348 int len = 1;
4349 if (eptr >= md->end_subject) break;
4350 GETCHARLEN(c, eptr, len);
4351 prop_chartype = UCD_CHARTYPE(c);
4352 if ((prop_chartype == ucp_Lu ||
4353 prop_chartype == ucp_Ll ||
4354 prop_chartype == ucp_Lt) == prop_fail_result)
4355 break;
4356 eptr+= len;
4357 }
4358 break;
4359
4360 case PT_GC:
4361 for (i = min; i < max; i++)
4362 {
4363 int len = 1;
4364 if (eptr >= md->end_subject) break;
4365 GETCHARLEN(c, eptr, len);
4366 prop_category = UCD_CATEGORY(c);
4367 if ((prop_category == prop_value) == prop_fail_result)
4368 break;
4369 eptr+= len;
4370 }
4371 break;
4372
4373 case PT_PC:
4374 for (i = min; i < max; i++)
4375 {
4376 int len = 1;
4377 if (eptr >= md->end_subject) break;
4378 GETCHARLEN(c, eptr, len);
4379 prop_chartype = UCD_CHARTYPE(c);
4380 if ((prop_chartype == prop_value) == prop_fail_result)
4381 break;
4382 eptr+= len;
4383 }
4384 break;
4385
4386 case PT_SC:
4387 for (i = min; i < max; i++)
4388 {
4389 int len = 1;
4390 if (eptr >= md->end_subject) break;
4391 GETCHARLEN(c, eptr, len);
4392 prop_script = UCD_SCRIPT(c);
4393 if ((prop_script == prop_value) == prop_fail_result)
4394 break;
4395 eptr+= len;
4396 }
4397 break;
4398 }
4399
4400 /* eptr is now past the end of the maximum run */
4401
4402 if (possessive) continue;
4403 for(;;)
4404 {
4405 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4406 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4407 if (eptr-- == pp) break; /* Stop if tried at original pos */
4408 if (utf8) BACKCHAR(eptr);
4409 }
4410 }
4411
4412 /* Match extended Unicode sequences. We will get here only if the
4413 support is in the binary; otherwise a compile-time error occurs. */
4414
4415 else if (ctype == OP_EXTUNI)
4416 {
4417 for (i = min; i < max; i++)
4418 {
4419 if (eptr >= md->end_subject) break;
4420 GETCHARINCTEST(c, eptr);
4421 prop_category = UCD_CATEGORY(c);
4422 if (prop_category == ucp_M) break;
4423 while (eptr < md->end_subject)
4424 {
4425 int len = 1;
4426 if (!utf8) c = *eptr; else
4427 {
4428 GETCHARLEN(c, eptr, len);
4429 }
4430 prop_category = UCD_CATEGORY(c);
4431 if (prop_category != ucp_M) break;
4432 eptr += len;
4433 }
4434 }
4435
4436 /* eptr is now past the end of the maximum run */
4437
4438 if (possessive) continue;
4439 for(;;)
4440 {
4441 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4442 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4443 if (eptr-- == pp) break; /* Stop if tried at original pos */
4444 for (;;) /* Move back over one extended */
4445 {
4446 int len = 1;
4447 if (!utf8) c = *eptr; else
4448 {
4449 BACKCHAR(eptr);
4450 GETCHARLEN(c, eptr, len);
4451 }
4452 prop_category = UCD_CATEGORY(c);
4453 if (prop_category != ucp_M) break;
4454 eptr--;
4455 }
4456 }
4457 }
4458
4459 else
4460 #endif /* SUPPORT_UCP */
4461
4462 #ifdef SUPPORT_UTF8
4463 /* UTF-8 mode */
4464
4465 if (utf8)
4466 {
4467 switch(ctype)
4468 {
4469 case OP_ANY:
4470 if (max < INT_MAX)
4471 {
4472 for (i = min; i < max; i++)
4473 {
4474 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4475 eptr++;
4476 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4477 }
4478 }
4479
4480 /* Handle unlimited UTF-8 repeat */
4481
4482 else
4483 {
4484 for (i = min; i < max; i++)
4485 {
4486 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4487 eptr++;
4488 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4489 }
4490 }
4491 break;
4492
4493 case OP_ALLANY:
4494 if (max < INT_MAX)
4495 {
4496 for (i = min; i < max; i++)
4497 {
4498 if (eptr >= md->end_subject) break;
4499 eptr++;
4500 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4501 }
4502 }
4503 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
4504 break;
4505
4506 /* The byte case is the same as non-UTF8 */
4507
4508 case OP_ANYBYTE:
4509 c = max - min;
4510 if (c > (unsigned int)(md->end_subject - eptr))
4511 c = md->end_subject - eptr;
4512 eptr += c;
4513 break;
4514
4515 case OP_ANYNL:
4516 for (i = min; i < max; i++)
4517 {
4518 int len = 1;
4519 if (eptr >= md->end_subject) break;
4520 GETCHARLEN(c, eptr, len);
4521 if (c == 0x000d)
4522 {
4523 if (++eptr >= md->end_subject) break;
4524 if (*eptr == 0x000a) eptr++;
4525 }
4526 else
4527 {
4528 if (c != 0x000a &&
4529 (md->bsr_anycrlf ||
4530 (c != 0x000b && c != 0x000c &&
4531 c != 0x0085 && c != 0x2028 && c != 0x2029)))
4532 break;
4533 eptr += len;
4534 }
4535 }
4536 break;
4537
4538 case OP_NOT_HSPACE:
4539 case OP_HSPACE:
4540 for (i = min; i < max; i++)
4541 {
4542 BOOL gotspace;
4543 int len = 1;
4544 if (eptr >= md->end_subject) break;
4545 GETCHARLEN(c, eptr, len);
4546 switch(c)
4547 {
4548 default: gotspace = FALSE; break;
4549 case 0x09: /* HT */
4550 case 0x20: /* SPACE */
4551 case 0xa0: /* NBSP */
4552 case 0x1680: /* OGHAM SPACE MARK */
4553 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4554 case 0x2000: /* EN QUAD */
4555 case 0x2001: /* EM QUAD */
4556 case 0x2002: /* EN SPACE */
4557 case 0x2003: /* EM SPACE */
4558 case 0x2004: /* THREE-PER-EM SPACE */
4559 case 0x2005: /* FOUR-PER-EM SPACE */
4560 case 0x2006: /* SIX-PER-EM SPACE */
4561 case 0x2007: /* FIGURE SPACE */
4562 case 0x2008: /* PUNCTUATION SPACE */
4563 case 0x2009: /* THIN SPACE */
4564 case 0x200A: /* HAIR SPACE */
4565 case 0x202f: /* NARROW NO-BREAK SPACE */
4566 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4567 case 0x3000: /* IDEOGRAPHIC SPACE */
4568 gotspace = TRUE;
4569 break;
4570 }
4571 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
4572 eptr += len;
4573 }
4574 break;
4575
4576 case OP_NOT_VSPACE:
4577 case OP_VSPACE:
4578 for (i = min; i < max; i++)
4579 {
4580 BOOL gotspace;
4581 int len = 1;
4582 if (eptr >= md->end_subject) break;
4583 GETCHARLEN(c, eptr, len);
4584 switch(c)
4585 {
4586 default: gotspace = FALSE; break;
4587 case 0x0a: /* LF */
4588 case 0x0b: /* VT */
4589 case 0x0c: /* FF */
4590 case 0x0d: /* CR */
4591 case 0x85: /* NEL */
4592 case 0x2028: /* LINE SEPARATOR */
4593 case 0x2029: /* PARAGRAPH SEPARATOR */
4594 gotspace = TRUE;
4595 break;
4596 }
4597 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
4598 eptr += len;
4599 }
4600 break;
4601
4602 case OP_NOT_DIGIT:
4603 for (i = min; i < max; i++)
4604 {
4605 int len = 1;
4606 if (eptr >= md->end_subject) break;
4607 GETCHARLEN(c, eptr, len);
4608 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
4609 eptr+= len;
4610 }
4611 break;
4612
4613 case OP_DIGIT:
4614 for (i = min; i < max; i++)
4615 {
4616 int len = 1;
4617 if (eptr >= md->end_subject) break;
4618 GETCHARLEN(c, eptr, len);
4619 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
4620 eptr+= len;
4621 }
4622 break;
4623
4624 case OP_NOT_WHITESPACE:
4625 for (i = min; i < max; i++)
4626 {
4627 int len = 1;
4628 if (eptr >= md->end_subject) break;
4629 GETCHARLEN(c, eptr, len);
4630 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
4631 eptr+= len;
4632 }
4633 break;
4634
4635 case OP_WHITESPACE:
4636 for (i = min; i < max; i++)
4637 {
4638 int len = 1;
4639 if (eptr >= md->end_subject) break;
4640 GETCHARLEN(c, eptr, len);
4641 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
4642 eptr+= len;
4643 }
4644 break;
4645
4646 case OP_NOT_WORDCHAR:
4647 for (i = min; i < max; i++)
4648 {
4649 int len = 1;
4650 if (eptr >= md->end_subject) break;
4651 GETCHARLEN(c, eptr, len);
4652 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
4653 eptr+= len;
4654 }
4655 break;
4656
4657 case OP_WORDCHAR:
4658 for (i = min; i < max; i++)
4659 {
4660 int len = 1;
4661 if (eptr >= md->end_subject) break;
4662 GETCHARLEN(c, eptr, len);
4663 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
4664 eptr+= len;
4665 }
4666 break;
4667
4668 default:
4669 RRETURN(PCRE_ERROR_INTERNAL);
4670 }
4671
4672 /* eptr is now past the end of the maximum run */
4673
4674 if (possessive) continue;
4675 for(;;)
4676 {
4677 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4678 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4679 if (eptr-- == pp) break; /* Stop if tried at original pos */
4680 BACKCHAR(eptr);
4681 }
4682 }
4683 else
4684 #endif /* SUPPORT_UTF8 */
4685
4686 /* Not UTF-8 mode */
4687 {
4688 switch(ctype)
4689 {
4690 case OP_ANY:
4691 for (i = min; i < max; i++)
4692 {
4693 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4694 eptr++;
4695 }
4696 break;
4697
4698 case OP_ALLANY:
4699 case OP_ANYBYTE:
4700 c = max - min;
4701 if (c > (unsigned int)(md->end_subject - eptr))
4702 c = md->end_subject - eptr;
4703 eptr += c;
4704 break;
4705
4706 case OP_ANYNL:
4707 for (i = min; i < max; i++)
4708 {
4709 if (eptr >= md->end_subject) break;
4710 c = *eptr;
4711 if (c == 0x000d)
4712 {
4713 if (++eptr >= md->end_subject) break;
4714 if (*eptr == 0x000a) eptr++;
4715 }
4716 else
4717 {
4718 if (c != 0x000a &&
4719 (md->bsr_anycrlf ||
4720 (c != 0x000b && c != 0x000c && c != 0x0085)))
4721 break;
4722 eptr++;
4723 }
4724 }
4725 break;
4726
4727 case OP_NOT_HSPACE:
4728 for (i = min; i < max; i++)
4729 {
4730 if (eptr >= md->end_subject) break;
4731 c = *eptr;
4732 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4733 eptr++;
4734 }
4735 break;
4736
4737 case OP_HSPACE:
4738 for (i = min; i < max; i++)
4739 {
4740 if (eptr >= md->end_subject) break;
4741 c = *eptr;
4742 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4743 eptr++;
4744 }
4745 break;
4746
4747 case OP_NOT_VSPACE:
4748 for (i = min; i < max; i++)
4749 {
4750 if (eptr >= md->end_subject) break;
4751 c = *eptr;
4752 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4753 break;
4754 eptr++;
4755 }
4756 break;
4757
4758 case OP_VSPACE:
4759 for (i = min; i < max; i++)
4760 {
4761 if (eptr >= md->end_subject) break;
4762 c = *eptr;
4763 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4764 break;
4765 eptr++;
4766 }
4767 break;
4768
4769 case OP_NOT_DIGIT:
4770 for (i = min; i < max; i++)
4771 {
4772 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4773 break;
4774 eptr++;
4775 }
4776 break;
4777
4778 case OP_DIGIT:
4779 for (i = min; i < max; i++)
4780 {
4781 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4782 break;
4783 eptr++;
4784 }
4785 break;
4786
4787 case OP_NOT_WHITESPACE:
4788 for (i = min; i < max; i++)
4789 {
4790 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4791 break;
4792 eptr++;
4793 }
4794 break;
4795
4796 case OP_WHITESPACE:
4797 for (i = min; i < max; i++)
4798 {
4799 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4800 break;
4801 eptr++;
4802 }
4803 break;
4804
4805 case OP_NOT_WORDCHAR:
4806 for (i = min; i < max; i++)
4807 {
4808 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4809 break;
4810 eptr++;
4811 }
4812 break;
4813
4814 case OP_WORDCHAR:
4815 for (i = min; i < max; i++)
4816 {
4817 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4818 break;
4819 eptr++;
4820 }
4821 break;
4822
4823 default:
4824 RRETURN(PCRE_ERROR_INTERNAL);
4825 }
4826
4827 /* eptr is now past the end of the maximum run */
4828
4829 if (possessive) continue;
4830 while (eptr >= pp)
4831 {
4832 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4833 eptr--;
4834 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4835 }
4836 }
4837
4838 /* Get here if we can't make it match with any permitted repetitions */
4839
4840 RRETURN(MATCH_NOMATCH);
4841 }
4842 /* Control never gets here */
4843
4844 /* There's been some horrible disaster. Arrival here can only mean there is
4845 something seriously wrong in the code above or the OP_xxx definitions. */
4846
4847 default:
4848 DPRINTF(("Unknown opcode %d\n", *ecode));
4849 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4850 }
4851
4852 /* Do not stick any code in here without much thought; it is assumed
4853 that "continue" in the code above comes out to here to repeat the main
4854 loop. */
4855
4856 } /* End of main loop */
4857 /* Control never reaches here */
4858
4859
4860 /* When compiling to use the heap rather than the stack for recursive calls to
4861 match(), the RRETURN() macro jumps here. The number that is saved in
4862 frame->Xwhere indicates which label we actually want to return to. */
4863
4864 #ifdef NO_RECURSE
4865 #define LBL(val) case val: goto L_RM##val;
4866 HEAP_RETURN:
4867 switch (frame->Xwhere)
4868 {
4869 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4870 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4871 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4872 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4873 LBL(53) LBL(54)
4874 #ifdef SUPPORT_UTF8
4875 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4876 LBL(32) LBL(34) LBL(42) LBL(46)
4877 #ifdef SUPPORT_UCP
4878 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4879 #endif /* SUPPORT_UCP */
4880 #endif /* SUPPORT_UTF8 */
4881 default:
4882 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4883 return PCRE_ERROR_INTERNAL;
4884 }
4885 #undef LBL
4886 #endif /* NO_RECURSE */
4887 }
4888
4889
4890 /***************************************************************************
4891 ****************************************************************************
4892 RECURSION IN THE match() FUNCTION
4893
4894 Undefine all the macros that were defined above to handle this. */
4895
4896 #ifdef NO_RECURSE
4897 #undef eptr
4898 #undef ecode
4899 #undef mstart
4900 #undef offset_top
4901 #undef ims
4902 #undef eptrb
4903 #undef flags
4904
4905 #undef callpat
4906 #undef charptr
4907 #undef data
4908 #undef next
4909 #undef pp
4910 #undef prev
4911 #undef saved_eptr
4912
4913 #undef new_recursive
4914
4915 #undef cur_is_word
4916 #undef condition
4917 #undef prev_is_word
4918
4919 #undef original_ims
4920
4921 #undef ctype
4922 #undef length
4923 #undef max
4924 #undef min
4925 #undef number
4926 #undef offset
4927 #undef op
4928 #undef save_capture_last
4929 #undef save_offset1
4930 #undef save_offset2
4931 #undef save_offset3
4932 #undef stacksave
4933
4934 #undef newptrb
4935
4936 #endif
4937
4938 /* These two are defined as macros in both cases */
4939
4940 #undef fc
4941 #undef fi
4942
4943 /***************************************************************************
4944 ***************************************************************************/
4945
4946
4947
4948 /*************************************************
4949 * Execute a Regular Expression *
4950 *************************************************/
4951
4952 /* This function applies a compiled re to a subject string and picks out
4953 portions of the string if it matches. Two elements in the vector are set for
4954 each substring: the offsets to the start and end of the substring.
4955
4956 Arguments:
4957 argument_re points to the compiled expression
4958 extra_data points to extra data or is NULL
4959 subject points to the subject string
4960 length length of subject string (may contain binary zeros)
4961 start_offset where to start in the subject string
4962 options option bits
4963 offsets points to a vector of ints to be filled in with offsets
4964 offsetcount the number of elements in the vector
4965
4966 Returns: > 0 => success; value is the number of elements filled in
4967 = 0 => success, but offsets is not big enough
4968 -1 => failed to match
4969 < -1 => some kind of unexpected problem
4970 */
4971
4972 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4973 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4974 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4975 int offsetcount)
4976 {
4977 int rc, resetcount, ocount;
4978 int first_byte = -1;
4979 int req_byte = -1;
4980 int req_byte2 = -1;
4981 int newline;
4982 unsigned long int ims;
4983 BOOL using_temporary_offsets = FALSE;
4984 BOOL anchored;
4985 BOOL startline;
4986 BOOL firstline;
4987 BOOL first_byte_caseless = FALSE;
4988 BOOL req_byte_caseless = FALSE;
4989 BOOL utf8;
4990 match_data match_block;
4991 match_data *md = &match_block;
4992 const uschar *tables;
4993 const uschar *start_bits = NULL;
4994 USPTR start_match = (USPTR)subject + start_offset;
4995 USPTR end_subject;
4996 USPTR start_partial = NULL;
4997 USPTR req_byte_ptr = start_match - 1;
4998
4999 pcre_study_data internal_study;
5000 const pcre_study_data *study;
5001
5002 real_pcre internal_re;
5003 const real_pcre *external_re = (const real_pcre *)argument_re;
5004 const real_pcre *re = external_re;
5005
5006 /* Plausibility checks */
5007
5008 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5009 if (re == NULL || subject == NULL ||
5010 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5011 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5012
5013 /* This information is for finding all the numbers associated with a given
5014 name, for condition testing. */
5015
5016 md->name_table = (uschar *)re + re->name_table_offset;
5017 md->name_count = re->name_count;
5018 md->name_entry_size = re->name_entry_size;
5019
5020 /* Fish out the optional data from the extra_data structure, first setting
5021 the default values. */
5022
5023 study = NULL;
5024 md->match_limit = MATCH_LIMIT;
5025 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5026 md->callout_data = NULL;
5027
5028 /* The table pointer is always in native byte order. */
5029
5030 tables = external_re->tables;
5031
5032 if (extra_data != NULL)
5033 {
5034 register unsigned int flags = extra_data->flags;
5035 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5036 study = (const pcre_study_data *)extra_data->study_data;
5037 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5038 md->match_limit = extra_data->match_limit;
5039 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5040 md->match_limit_recursion = extra_data->match_limit_recursion;
5041 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5042 md->callout_data = extra_data->callout_data;
5043 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5044 }
5045
5046 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5047 is a feature that makes it possible to save compiled regex and re-use them
5048 in other programs later. */
5049
5050 if (tables == NULL) tables = _pcre_default_tables;
5051
5052 /* Check that the first field in the block is the magic number. If it is not,
5053 test for a regex that was compiled on a host of opposite endianness. If this is
5054 the case, flipped values are put in internal_re and internal_study if there was
5055 study data too. */
5056
5057 if (re->magic_number != MAGIC_NUMBER)
5058 {
5059 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5060 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5061 if (study != NULL) study = &internal_study;
5062 }
5063
5064 /* Set up other data */
5065
5066 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5067 startline = (re->flags & PCRE_STARTLINE) != 0;
5068 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5069
5070 /* The code starts after the real_pcre block and the capture name table. */
5071
5072 md->start_code = (const uschar *)external_re + re->name_table_offset +
5073 re->name_count * re->name_entry_size;
5074
5075 md->start_subject = (USPTR)subject;
5076 md->start_offset = start_offset;
5077 md->end_subject = md->start_subject + length;
5078 end_subject = md->end_subject;
5079
5080 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5081 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5082 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5083
5084 md->notbol = (options & PCRE_NOTBOL) != 0;
5085 md->noteol = (options & PCRE_NOTEOL) != 0;
5086 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5087 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5088 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5089 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5090 md->hitend = FALSE;
5091
5092 md->recursive = NULL; /* No recursion at top level */
5093
5094 md->lcc = tables + lcc_offset;
5095 md->ctypes = tables + ctypes_offset;
5096
5097 /* Handle different \R options. */
5098
5099 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5100 {
5101 case 0:
5102 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5103 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5104 else
5105 #ifdef BSR_ANYCRLF
5106 md->bsr_anycrlf = TRUE;
5107 #else
5108 md->bsr_anycrlf = FALSE;
5109 #endif
5110 break;
5111
5112 case PCRE_BSR_ANYCRLF:
5113 md->bsr_anycrlf = TRUE;
5114 break;
5115
5116 case PCRE_BSR_UNICODE:
5117 md->bsr_anycrlf = FALSE;
5118 break;
5119
5120 default: return PCRE_ERROR_BADNEWLINE;
5121 }
5122
5123 /* Handle different types of newline. The three bits give eight cases. If
5124 nothing is set at run time, whatever was used at compile time applies. */
5125
5126 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5127 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5128 {
5129 case 0: newline = NEWLINE; break; /* Compile-time default */
5130 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5131 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5132 case PCRE_NEWLINE_CR+
5133 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5134 case PCRE_NEWLINE_ANY: newline = -1; break;
5135 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5136 default: return PCRE_ERROR_BADNEWLINE;
5137 }
5138
5139 if (newline == -2)
5140 {
5141 md->nltype = NLTYPE_ANYCRLF;
5142 }
5143 else if (newline < 0)
5144 {
5145 md->nltype = NLTYPE_ANY;
5146 }
5147 else
5148 {
5149 md->nltype = NLTYPE_FIXED;
5150 if (newline > 255)
5151 {
5152 md->nllen = 2;
5153 md->nl[0] = (newline >> 8) & 255;
5154 md->nl[1] = newline & 255;
5155 }
5156 else
5157 {
5158 md->nllen = 1;
5159 md->nl[0] = newline;
5160 }
5161 }
5162
5163 /* Partial matching was originally supported only for a restricted set of
5164 regexes; from release 8.00 there are no restrictions, but the bits are still
5165 defined (though never set). So there's no harm in leaving this code. */
5166
5167 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5168 return PCRE_ERROR_BADPARTIAL;
5169
5170 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5171 back the character offset. */
5172
5173 #ifdef SUPPORT_UTF8
5174 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5175 {
5176 if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5177 return PCRE_ERROR_BADUTF8;
5178 if (start_offset > 0 && start_offset < length)
5179 {
5180 int tb = ((USPTR)subject)[start_offset];
5181 if (tb > 127)
5182 {
5183 tb &= 0xc0;
5184 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
5185 }
5186 }
5187 }
5188 #endif
5189
5190 /* The ims options can vary during the matching as a result of the presence
5191 of (?ims) items in the pattern. They are kept in a local variable so that
5192 restoring at the exit of a group is easy. */
5193
5194 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5195
5196 /* If the expression has got more back references than the offsets supplied can
5197 hold, we get a temporary chunk of working store to use during the matching.
5198 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5199 of 3. */
5200
5201 ocount = offsetcount - (offsetcount % 3);
5202
5203 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5204 {
5205 ocount = re->top_backref * 3 + 3;
5206 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5207 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5208 using_temporary_offsets = TRUE;
5209 DPRINTF(("Got memory to hold back references\n"));
5210 }
5211 else md->offset_vector = offsets;
5212
5213 md->offset_end = ocount;
5214 md->offset_max = (2*ocount)/3;
5215 md->offset_overflow = FALSE;
5216 md->capture_last = -1;
5217
5218 /* Compute the minimum number of offsets that we need to reset each time. Doing
5219 this makes a huge difference to execution time when there aren't many brackets
5220 in the pattern. */
5221
5222 resetcount = 2 + re->top_bracket * 2;
5223 if (resetcount > offsetcount) resetcount = ocount;
5224
5225 /* Reset the working variable associated with each extraction. These should
5226 never be used unless previously set, but they get saved and restored, and so we
5227 initialize them to avoid reading uninitialized locations. */
5228
5229 if (md->offset_vector != NULL)
5230 {
5231 register int *iptr = md->offset_vector + ocount;
5232 register int *iend = iptr - resetcount/2 + 1;
5233 while (--iptr >= iend) *iptr = -1;
5234 }
5235
5236 /* Set up the first character to match, if available. The first_byte value is
5237 never set for an anchored regular expression, but the anchoring may be forced
5238 at run time, so we have to test for anchoring. The first char may be unset for
5239 an unanchored pattern, of course. If there's no first char and the pattern was
5240 studied, there may be a bitmap of possible first characters. */
5241
5242 if (!anchored)
5243 {
5244 if ((re->flags & PCRE_FIRSTSET) != 0)
5245 {
5246 first_byte = re->first_byte & 255;
5247 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5248 first_byte = md->lcc[first_byte];
5249 }
5250 else
5251 if (!startline && study != NULL &&
5252 (study->flags & PCRE_STUDY_MAPPED) != 0)
5253 start_bits = study->start_bits;
5254 }
5255
5256 /* For anchored or unanchored matches, there may be a "last known required
5257 character" set. */
5258
5259 if ((re->flags & PCRE_REQCHSET) != 0)
5260 {
5261 req_byte = re->req_byte & 255;
5262 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5263 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5264 }
5265
5266
5267 /* ==========================================================================*/
5268
5269 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5270 the loop runs just once. */
5271
5272 for(;;)
5273 {
5274 USPTR save_end_subject = end_subject;
5275 USPTR new_start_match;
5276
5277 /* Reset the maximum number of extractions we might see. */
5278
5279 if (md->offset_vector != NULL)
5280 {
5281 register int *iptr = md->offset_vector;
5282 register int *iend = iptr + resetcount;
5283 while (iptr < iend) *iptr++ = -1;
5284 }
5285
5286 /* If firstline is TRUE, the start of the match is constrained to the first
5287 line of a multiline string. That is, the match must be before or at the first
5288 newline. Implement this by temporarily adjusting end_subject so that we stop
5289 scanning at a newline. If the match fails at the newline, later code breaks
5290 this loop. */
5291
5292 if (firstline)
5293 {
5294 USPTR t = start_match;
5295 #ifdef SUPPORT_UTF8
5296 if (utf8)
5297 {
5298 while (t < md->end_subject && !IS_NEWLINE(t))
5299 {
5300 t++;
5301 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5302 }
5303 }
5304 else
5305 #endif
5306 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5307 end_subject = t;
5308 }
5309
5310 /* There are some optimizations that avoid running the match if a known
5311 starting point is not found, or if a known later character is not present.
5312 However, there is an option that disables these, for testing and for ensuring
5313 that all callouts do actually occur. */
5314
5315 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5316 {
5317 /* Advance to a unique first byte if there is one. */
5318
5319 if (first_byte >= 0)
5320 {
5321 if (first_byte_caseless)
5322 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5323 start_match++;
5324 else
5325 while (start_match < end_subject && *start_match != first_byte)
5326 start_match++;
5327 }
5328
5329 /* Or to just after a linebreak for a multiline match */
5330
5331 else if (startline)
5332 {
5333 if (start_match > md->start_subject + start_offset)
5334 {
5335 #ifdef SUPPORT_UTF8
5336 if (utf8)
5337 {
5338 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5339 {
5340 start_match++;
5341 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5342 start_match++;
5343 }
5344 }
5345 else
5346 #endif
5347 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5348 start_match++;
5349
5350 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5351 and we are now at a LF, advance the match position by one more character.
5352 */
5353
5354 if (start_match[-1] == CHAR_CR &&
5355 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5356 start_match < end_subject &&
5357 *start_match == CHAR_NL)
5358 start_match++;
5359 }
5360 }
5361
5362 /* Or to a non-unique first byte after study */
5363
5364 else if (start_bits != NULL)
5365 {
5366 while (start_match < end_subject)
5367 {
5368 register unsigned int c = *start_match;
5369 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5370 else break;
5371 }
5372 }
5373 } /* Starting optimizations */
5374
5375 /* Restore fudged end_subject */
5376
5377 end_subject = save_end_subject;
5378
5379 /* The following two optimizations are disabled for partial matching or if
5380 disabling is explicitly requested. */
5381
5382 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5383 {
5384 /* If the pattern was studied, a minimum subject length may be set. This is
5385 a lower bound; no actual string of that length may actually match the
5386 pattern. Although the value is, strictly, in characters, we treat it as
5387 bytes to avoid spending too much time in this optimization. */
5388
5389 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5390 end_subject - start_match < study->minlength)
5391 {
5392 rc = MATCH_NOMATCH;
5393 break;
5394 }
5395
5396 /* If req_byte is set, we know that that character must appear in the
5397 subject for the match to succeed. If the first character is set, req_byte
5398 must be later in the subject; otherwise the test starts at the match point.
5399 This optimization can save a huge amount of backtracking in patterns with
5400 nested unlimited repeats that aren't going to match. Writing separate code
5401 for cased/caseless versions makes it go faster, as does using an
5402 autoincrement and backing off on a match.
5403
5404 HOWEVER: when the subject string is very, very long, searching to its end
5405 can take a long time, and give bad performance on quite ordinary patterns.
5406 This showed up when somebody was matching something like /^\d+C/ on a
5407 32-megabyte string... so we don't do this when the string is sufficiently
5408 long. */
5409
5410 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5411 {
5412 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5413
5414 /* We don't need to repeat the search if we haven't yet reached the
5415 place we found it at last time. */
5416
5417 if (p > req_byte_ptr)
5418 {
5419 if (req_byte_caseless)
5420 {
5421 while (p < end_subject)
5422 {
5423 register int pp = *p++;
5424 if (pp == req_byte || pp == req_byte2) { p--; break; }
5425 }
5426 }
5427 else
5428 {
5429 while (p < end_subject)
5430 {
5431 if (*p++ == req_byte) { p--; break; }
5432 }
5433 }
5434
5435 /* If we can't find the required character, break the matching loop,
5436 forcing a match failure. */
5437
5438 if (p >= end_subject)
5439 {
5440 rc = MATCH_NOMATCH;
5441 break;
5442 }
5443
5444 /* If we have found the required character, save the point where we
5445 found it, so that we don't search again next time round the loop if
5446 the start hasn't passed this character yet. */
5447
5448 req_byte_ptr = p;
5449 }
5450 }
5451 }
5452
5453 #ifdef DEBUG /* Sigh. Some compilers never learn. */
5454 printf(">>>> Match against: ");
5455 pchars(start_match, end_subject - start_match, TRUE, md);
5456 printf("\n");
5457 #endif
5458
5459 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5460 first starting point for which a partial match was found. */
5461
5462 md->start_match_ptr = start_match;
5463 md->start_used_ptr = start_match;
5464 md->match_call_count = 0;
5465 rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5466 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5467
5468 switch(rc)
5469 {
5470 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
5471 exactly like PRUNE. */
5472
5473 case MATCH_NOMATCH:
5474 case MATCH_PRUNE:
5475 case MATCH_THEN:
5476 new_start_match = start_match + 1;
5477 #ifdef SUPPORT_UTF8
5478 if (utf8)
5479 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
5480 new_start_match++;
5481 #endif
5482 break;
5483
5484 /* SKIP passes back the next starting point explicitly. */
5485
5486 case MATCH_SKIP:
5487 new_start_match = md->start_match_ptr;
5488 break;
5489
5490 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
5491
5492 case MATCH_COMMIT:
5493 rc = MATCH_NOMATCH;
5494 goto ENDLOOP;
5495
5496 /* Any other return is either a match, or some kind of error. */
5497
5498 default:
5499 goto ENDLOOP;
5500 }
5501
5502 /* Control reaches here for the various types of "no match at this point"
5503 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
5504
5505 rc = MATCH_NOMATCH;
5506
5507 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
5508 newline in the subject (though it may continue over the newline). Therefore,
5509 if we have just failed to match, starting at a newline, do not continue. */
5510
5511 if (firstline && IS_NEWLINE(start_match)) break;
5512
5513 /* Advance to new matching position */
5514
5515 start_match = new_start_match;
5516
5517 /* Break the loop if the pattern is anchored or if we have passed the end of
5518 the subject. */
5519
5520 if (anchored || start_match > end_subject) break;
5521
5522 /* If we have just passed a CR and we are now at a LF, and the pattern does
5523 not contain any explicit matches for \r or \n, and the newline option is CRLF
5524 or ANY or ANYCRLF, advance the match position by one more character. */
5525
5526 if (start_match[-1] == CHAR_CR &&
5527 start_match < end_subject &&
5528 *start_match == CHAR_NL &&
5529 (re->flags & PCRE_HASCRORLF) == 0 &&
5530 (md->nltype == NLTYPE_ANY ||
5531 md->nltype == NLTYPE_ANYCRLF ||
5532 md->nllen == 2))
5533 start_match++;
5534
5535 } /* End of for(;;) "bumpalong" loop */
5536
5537 /* ==========================================================================*/
5538
5539 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
5540 conditions is true:
5541
5542 (1) The pattern is anchored or the match was failed by (*COMMIT);
5543
5544 (2) We are past the end of the subject;
5545
5546 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
5547 this option requests that a match occur at or before the first newline in
5548 the subject.
5549
5550 When we have a match and the offset vector is big enough to deal with any
5551 backreferences, captured substring offsets will already be set up. In the case
5552 where we had to get some local store to hold offsets for backreference
5553 processing, copy those that we can. In this case there need not be overflow if
5554 certain parts of the pattern were not used, even though there are more
5555 capturing parentheses than vector slots. */
5556
5557 ENDLOOP:
5558
5559 if (rc == MATCH_MATCH)
5560 {
5561 if (using_temporary_offsets)
5562 {
5563 if (offsetcount >= 4)
5564 {
5565 memcpy(offsets + 2, md->offset_vector + 2,
5566 (offsetcount - 2) * sizeof(int));
5567 DPRINTF(("Copied offsets from temporary memory\n"));
5568 }
5569 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
5570 DPRINTF(("Freeing temporary memory\n"));
5571 (pcre_free)(md->offset_vector);
5572 }
5573
5574 /* Set the return code to the number of captured strings, or 0 if there are
5575 too many to fit into the vector. */
5576
5577 rc = md->offset_overflow? 0 : md->end_offset_top/2;
5578
5579 /* If there is space, set up the whole thing as substring 0. The value of
5580 md->start_match_ptr might be modified if \K was encountered on the success
5581 matching path. */
5582
5583 if (offsetcount < 2) rc = 0; else
5584 {
5585 offsets[0] = md->start_match_ptr - md->start_subject;
5586 offsets[1] = md->end_match_ptr - md->start_subject;
5587 }
5588
5589 DPRINTF((">>>> returning %d\n", rc));
5590 return rc;
5591 }
5592
5593 /* Control gets here if there has been an error, or if the overall match
5594 attempt has failed at all permitted starting positions. */
5595
5596 if (using_temporary_offsets)
5597 {
5598 DPRINTF(("Freeing temporary memory\n"));
5599 (pcre_free)(md->offset_vector);
5600 }
5601
5602 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5603 {
5604 DPRINTF((">>>> error: returning %d\n", rc));
5605 return rc;
5606 }
5607 else if (start_partial != NULL)
5608 {
5609 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5610 if (offsetcount > 1)
5611 {
5612 offsets[0] = start_partial - (USPTR)subject;
5613 offsets[1] = end_subject - (USPTR)subject;
5614 }
5615 return PCRE_ERROR_PARTIAL;
5616 }
5617 else
5618 {
5619 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
5620 return PCRE_ERROR_NOMATCH;
5621 }
5622 }
5623
5624 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12