/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 123 - (show annotations) (download)
Mon Mar 12 15:19:06 2007 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 127127 byte(s)
Removal of trailing spaces.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #define NLBLOCK md /* Block containing newline information */
46 #define PSSTART start_subject /* Field containing processed string start */
47 #define PSEND end_subject /* Field containing processed string end */
48
49 #include "pcre_internal.h"
50
51 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53
54 #define EPTR_WORK_SIZE (1000)
55
56 /* Flag bits for the match() function */
57
58 #define match_condassert 0x01 /* Called to check a condition assertion */
59 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
60 #define match_tail_recursed 0x04 /* Tail recursive call */
61
62 /* Non-error returns from the match() function. Error returns are externally
63 defined PCRE_ERROR_xxx codes, which are all negative. */
64
65 #define MATCH_MATCH 1
66 #define MATCH_NOMATCH 0
67
68 /* Maximum number of ints of offset to save on the stack for recursive calls.
69 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
70 because the offset vector is always a multiple of 3 long. */
71
72 #define REC_STACK_SAVE_MAX 30
73
74 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
75
76 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
77 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
78
79
80
81 #ifdef DEBUG
82 /*************************************************
83 * Debugging function to print chars *
84 *************************************************/
85
86 /* Print a sequence of chars in printable format, stopping at the end of the
87 subject if the requested.
88
89 Arguments:
90 p points to characters
91 length number to print
92 is_subject TRUE if printing from within md->start_subject
93 md pointer to matching data block, if is_subject is TRUE
94
95 Returns: nothing
96 */
97
98 static void
99 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100 {
101 unsigned int c;
102 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103 while (length-- > 0)
104 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
105 }
106 #endif
107
108
109
110 /*************************************************
111 * Match a back-reference *
112 *************************************************/
113
114 /* If a back reference hasn't been set, the length that is passed is greater
115 than the number of characters left in the string, so the match fails.
116
117 Arguments:
118 offset index into the offset vector
119 eptr points into the subject
120 length length to be matched
121 md points to match data block
122 ims the ims flags
123
124 Returns: TRUE if matched
125 */
126
127 static BOOL
128 match_ref(int offset, register USPTR eptr, int length, match_data *md,
129 unsigned long int ims)
130 {
131 USPTR p = md->start_subject + md->offset_vector[offset];
132
133 #ifdef DEBUG
134 if (eptr >= md->end_subject)
135 printf("matching subject <null>");
136 else
137 {
138 printf("matching subject ");
139 pchars(eptr, length, TRUE, md);
140 }
141 printf(" against backref ");
142 pchars(p, length, FALSE, md);
143 printf("\n");
144 #endif
145
146 /* Always fail if not enough characters left */
147
148 if (length > md->end_subject - eptr) return FALSE;
149
150 /* Separate the caselesss case for speed */
151
152 if ((ims & PCRE_CASELESS) != 0)
153 {
154 while (length-- > 0)
155 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
156 }
157 else
158 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
159
160 return TRUE;
161 }
162
163
164
165 /***************************************************************************
166 ****************************************************************************
167 RECURSION IN THE match() FUNCTION
168
169 The match() function is highly recursive, though not every recursive call
170 increases the recursive depth. Nevertheless, some regular expressions can cause
171 it to recurse to a great depth. I was writing for Unix, so I just let it call
172 itself recursively. This uses the stack for saving everything that has to be
173 saved for a recursive call. On Unix, the stack can be large, and this works
174 fine.
175
176 It turns out that on some non-Unix-like systems there are problems with
177 programs that use a lot of stack. (This despite the fact that every last chip
178 has oodles of memory these days, and techniques for extending the stack have
179 been known for decades.) So....
180
181 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182 calls by keeping local variables that need to be preserved in blocks of memory
183 obtained from malloc() instead instead of on the stack. Macros are used to
184 achieve this so that the actual code doesn't look very different to what it
185 always used to.
186 ****************************************************************************
187 ***************************************************************************/
188
189
190 /* These versions of the macros use the stack, as normal. There are debugging
191 versions and production versions. */
192
193 #ifndef NO_RECURSE
194 #define REGISTER register
195 #ifdef DEBUG
196 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197 { \
198 printf("match() called in line %d\n", __LINE__); \
199 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200 printf("to line %d\n", __LINE__); \
201 }
202 #define RRETURN(ra) \
203 { \
204 printf("match() returned %d from line %d ", ra, __LINE__); \
205 return ra; \
206 }
207 #else
208 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210 #define RRETURN(ra) return ra
211 #endif
212
213 #else
214
215
216 /* These versions of the macros manage a private stack on the heap. Note
217 that the rd argument of RMATCH isn't actually used. It's the md argument of
218 match(), which never changes. */
219
220 #define REGISTER
221
222 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
223 {\
224 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
225 if (setjmp(frame->Xwhere) == 0)\
226 {\
227 newframe->Xeptr = ra;\
228 newframe->Xecode = rb;\
229 newframe->Xoffset_top = rc;\
230 newframe->Xims = re;\
231 newframe->Xeptrb = rf;\
232 newframe->Xflags = rg;\
233 newframe->Xrdepth = frame->Xrdepth + 1;\
234 newframe->Xprevframe = frame;\
235 frame = newframe;\
236 DPRINTF(("restarting from line %d\n", __LINE__));\
237 goto HEAP_RECURSE;\
238 }\
239 else\
240 {\
241 DPRINTF(("longjumped back to line %d\n", __LINE__));\
242 frame = md->thisframe;\
243 rx = frame->Xresult;\
244 }\
245 }
246
247 #define RRETURN(ra)\
248 {\
249 heapframe *newframe = frame;\
250 frame = newframe->Xprevframe;\
251 (pcre_stack_free)(newframe);\
252 if (frame != NULL)\
253 {\
254 frame->Xresult = ra;\
255 md->thisframe = frame;\
256 longjmp(frame->Xwhere, 1);\
257 }\
258 return ra;\
259 }
260
261
262 /* Structure for remembering the local variables in a private frame */
263
264 typedef struct heapframe {
265 struct heapframe *Xprevframe;
266
267 /* Function arguments that may change */
268
269 const uschar *Xeptr;
270 const uschar *Xecode;
271 int Xoffset_top;
272 long int Xims;
273 eptrblock *Xeptrb;
274 int Xflags;
275 unsigned int Xrdepth;
276
277 /* Function local variables */
278
279 const uschar *Xcallpat;
280 const uschar *Xcharptr;
281 const uschar *Xdata;
282 const uschar *Xnext;
283 const uschar *Xpp;
284 const uschar *Xprev;
285 const uschar *Xsaved_eptr;
286
287 recursion_info Xnew_recursive;
288
289 BOOL Xcur_is_word;
290 BOOL Xcondition;
291 BOOL Xprev_is_word;
292
293 unsigned long int Xoriginal_ims;
294
295 #ifdef SUPPORT_UCP
296 int Xprop_type;
297 int Xprop_value;
298 int Xprop_fail_result;
299 int Xprop_category;
300 int Xprop_chartype;
301 int Xprop_script;
302 int Xoclength;
303 uschar Xocchars[8];
304 #endif
305
306 int Xctype;
307 unsigned int Xfc;
308 int Xfi;
309 int Xlength;
310 int Xmax;
311 int Xmin;
312 int Xnumber;
313 int Xoffset;
314 int Xop;
315 int Xsave_capture_last;
316 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
317 int Xstacksave[REC_STACK_SAVE_MAX];
318
319 eptrblock Xnewptrb;
320
321 /* Place to pass back result, and where to jump back to */
322
323 int Xresult;
324 jmp_buf Xwhere;
325
326 } heapframe;
327
328 #endif
329
330
331 /***************************************************************************
332 ***************************************************************************/
333
334
335
336 /*************************************************
337 * Match from current position *
338 *************************************************/
339
340 /* This function is called recursively in many circumstances. Whenever it
341 returns a negative (error) response, the outer incarnation must also return the
342 same response.
343
344 Performance note: It might be tempting to extract commonly used fields from the
345 md structure (e.g. utf8, end_subject) into individual variables to improve
346 performance. Tests using gcc on a SPARC disproved this; in the first case, it
347 made performance worse.
348
349 Arguments:
350 eptr pointer to current character in subject
351 ecode pointer to current position in compiled code
352 offset_top current top pointer
353 md pointer to "static" info for the match
354 ims current /i, /m, and /s options
355 eptrb pointer to chain of blocks containing eptr at start of
356 brackets - for testing for empty matches
357 flags can contain
358 match_condassert - this is an assertion condition
359 match_cbegroup - this is the start of an unlimited repeat
360 group that can match an empty string
361 match_tail_recursed - this is a tail_recursed group
362 rdepth the recursion depth
363
364 Returns: MATCH_MATCH if matched ) these values are >= 0
365 MATCH_NOMATCH if failed to match )
366 a negative PCRE_ERROR_xxx value if aborted by an error condition
367 (e.g. stopped by repeated call or recursion limit)
368 */
369
370 static int
371 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
372 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
373 int flags, unsigned int rdepth)
374 {
375 /* These variables do not need to be preserved over recursion in this function,
376 so they can be ordinary variables in all cases. Mark some of them with
377 "register" because they are used a lot in loops. */
378
379 register int rrc; /* Returns from recursive calls */
380 register int i; /* Used for loops not involving calls to RMATCH() */
381 register unsigned int c; /* Character values not kept over RMATCH() calls */
382 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
383
384 BOOL minimize, possessive; /* Quantifier options */
385
386 /* When recursion is not being used, all "local" variables that have to be
387 preserved over calls to RMATCH() are part of a "frame" which is obtained from
388 heap storage. Set up the top-level frame here; others are obtained from the
389 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
390
391 #ifdef NO_RECURSE
392 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
393 frame->Xprevframe = NULL; /* Marks the top level */
394
395 /* Copy in the original argument variables */
396
397 frame->Xeptr = eptr;
398 frame->Xecode = ecode;
399 frame->Xoffset_top = offset_top;
400 frame->Xims = ims;
401 frame->Xeptrb = eptrb;
402 frame->Xflags = flags;
403 frame->Xrdepth = rdepth;
404
405 /* This is where control jumps back to to effect "recursion" */
406
407 HEAP_RECURSE:
408
409 /* Macros make the argument variables come from the current frame */
410
411 #define eptr frame->Xeptr
412 #define ecode frame->Xecode
413 #define offset_top frame->Xoffset_top
414 #define ims frame->Xims
415 #define eptrb frame->Xeptrb
416 #define flags frame->Xflags
417 #define rdepth frame->Xrdepth
418
419 /* Ditto for the local variables */
420
421 #ifdef SUPPORT_UTF8
422 #define charptr frame->Xcharptr
423 #endif
424 #define callpat frame->Xcallpat
425 #define data frame->Xdata
426 #define next frame->Xnext
427 #define pp frame->Xpp
428 #define prev frame->Xprev
429 #define saved_eptr frame->Xsaved_eptr
430
431 #define new_recursive frame->Xnew_recursive
432
433 #define cur_is_word frame->Xcur_is_word
434 #define condition frame->Xcondition
435 #define prev_is_word frame->Xprev_is_word
436
437 #define original_ims frame->Xoriginal_ims
438
439 #ifdef SUPPORT_UCP
440 #define prop_type frame->Xprop_type
441 #define prop_value frame->Xprop_value
442 #define prop_fail_result frame->Xprop_fail_result
443 #define prop_category frame->Xprop_category
444 #define prop_chartype frame->Xprop_chartype
445 #define prop_script frame->Xprop_script
446 #define oclength frame->Xoclength
447 #define occhars frame->Xocchars
448 #endif
449
450 #define ctype frame->Xctype
451 #define fc frame->Xfc
452 #define fi frame->Xfi
453 #define length frame->Xlength
454 #define max frame->Xmax
455 #define min frame->Xmin
456 #define number frame->Xnumber
457 #define offset frame->Xoffset
458 #define op frame->Xop
459 #define save_capture_last frame->Xsave_capture_last
460 #define save_offset1 frame->Xsave_offset1
461 #define save_offset2 frame->Xsave_offset2
462 #define save_offset3 frame->Xsave_offset3
463 #define stacksave frame->Xstacksave
464
465 #define newptrb frame->Xnewptrb
466
467 /* When recursion is being used, local variables are allocated on the stack and
468 get preserved during recursion in the normal way. In this environment, fi and
469 i, and fc and c, can be the same variables. */
470
471 #else /* NO_RECURSE not defined */
472 #define fi i
473 #define fc c
474
475
476 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
477 const uschar *charptr; /* in small blocks of the code. My normal */
478 #endif /* style of coding would have declared */
479 const uschar *callpat; /* them within each of those blocks. */
480 const uschar *data; /* However, in order to accommodate the */
481 const uschar *next; /* version of this code that uses an */
482 USPTR pp; /* external "stack" implemented on the */
483 const uschar *prev; /* heap, it is easier to declare them all */
484 USPTR saved_eptr; /* here, so the declarations can be cut */
485 /* out in a block. The only declarations */
486 recursion_info new_recursive; /* within blocks below are for variables */
487 /* that do not have to be preserved over */
488 BOOL cur_is_word; /* a recursive call to RMATCH(). */
489 BOOL condition;
490 BOOL prev_is_word;
491
492 unsigned long int original_ims;
493
494 #ifdef SUPPORT_UCP
495 int prop_type;
496 int prop_value;
497 int prop_fail_result;
498 int prop_category;
499 int prop_chartype;
500 int prop_script;
501 int oclength;
502 uschar occhars[8];
503 #endif
504
505 int ctype;
506 int length;
507 int max;
508 int min;
509 int number;
510 int offset;
511 int op;
512 int save_capture_last;
513 int save_offset1, save_offset2, save_offset3;
514 int stacksave[REC_STACK_SAVE_MAX];
515
516 eptrblock newptrb;
517 #endif /* NO_RECURSE */
518
519 /* These statements are here to stop the compiler complaining about unitialized
520 variables. */
521
522 #ifdef SUPPORT_UCP
523 prop_value = 0;
524 prop_fail_result = 0;
525 #endif
526
527
528 /* This label is used for tail recursion, which is used in a few cases even
529 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
530 used. Thanks to Ian Taylor for noticing this possibility and sending the
531 original patch. */
532
533 TAIL_RECURSE:
534
535 /* OK, now we can get on with the real code of the function. Recursive calls
536 are specified by the macro RMATCH and RRETURN is used to return. When
537 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
538 and a "return", respectively (possibly with some debugging if DEBUG is
539 defined). However, RMATCH isn't like a function call because it's quite a
540 complicated macro. It has to be used in one particular way. This shouldn't,
541 however, impact performance when true recursion is being used. */
542
543 /* First check that we haven't called match() too many times, or that we
544 haven't exceeded the recursive call limit. */
545
546 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
547 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
548
549 original_ims = ims; /* Save for resetting on ')' */
550
551 #ifdef SUPPORT_UTF8
552 utf8 = md->utf8; /* Local copy of the flag */
553 #else
554 utf8 = FALSE;
555 #endif
556
557 /* At the start of a group with an unlimited repeat that may match an empty
558 string, the match_cbegroup flag is set. When this is the case, add the current
559 subject pointer to the chain of such remembered pointers, to be checked when we
560 hit the closing ket, in order to break infinite loops that match no characters.
561 When match() is called in other circumstances, don't add to the chain. If this
562 is a tail recursion, use a block from the workspace, as the one on the stack is
563 already used. */
564
565 if ((flags & match_cbegroup) != 0)
566 {
567 eptrblock *p;
568 if ((flags & match_tail_recursed) != 0)
569 {
570 if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
571 p = md->eptrchain + md->eptrn++;
572 }
573 else p = &newptrb;
574 p->epb_saved_eptr = eptr;
575 p->epb_prev = eptrb;
576 eptrb = p;
577 }
578
579 /* Now start processing the opcodes. */
580
581 for (;;)
582 {
583 minimize = possessive = FALSE;
584 op = *ecode;
585
586 /* For partial matching, remember if we ever hit the end of the subject after
587 matching at least one subject character. */
588
589 if (md->partial &&
590 eptr >= md->end_subject &&
591 eptr > md->start_match)
592 md->hitend = TRUE;
593
594 switch(op)
595 {
596 /* Handle a capturing bracket. If there is space in the offset vector, save
597 the current subject position in the working slot at the top of the vector.
598 We mustn't change the current values of the data slot, because they may be
599 set from a previous iteration of this group, and be referred to by a
600 reference inside the group.
601
602 If the bracket fails to match, we need to restore this value and also the
603 values of the final offsets, in case they were set by a previous iteration
604 of the same bracket.
605
606 If there isn't enough space in the offset vector, treat this as if it were
607 a non-capturing bracket. Don't worry about setting the flag for the error
608 case here; that is handled in the code for KET. */
609
610 case OP_CBRA:
611 case OP_SCBRA:
612 number = GET2(ecode, 1+LINK_SIZE);
613 offset = number << 1;
614
615 #ifdef DEBUG
616 printf("start bracket %d\n", number);
617 printf("subject=");
618 pchars(eptr, 16, TRUE, md);
619 printf("\n");
620 #endif
621
622 if (offset < md->offset_max)
623 {
624 save_offset1 = md->offset_vector[offset];
625 save_offset2 = md->offset_vector[offset+1];
626 save_offset3 = md->offset_vector[md->offset_end - number];
627 save_capture_last = md->capture_last;
628
629 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
630 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
631
632 flags = (op == OP_SCBRA)? match_cbegroup : 0;
633 do
634 {
635 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
636 ims, eptrb, flags);
637 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
638 md->capture_last = save_capture_last;
639 ecode += GET(ecode, 1);
640 }
641 while (*ecode == OP_ALT);
642
643 DPRINTF(("bracket %d failed\n", number));
644
645 md->offset_vector[offset] = save_offset1;
646 md->offset_vector[offset+1] = save_offset2;
647 md->offset_vector[md->offset_end - number] = save_offset3;
648
649 RRETURN(MATCH_NOMATCH);
650 }
651
652 /* Insufficient room for saving captured contents. Treat as a non-capturing
653 bracket. */
654
655 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
656
657 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
658 final alternative within the brackets, we would return the result of a
659 recursive call to match() whatever happened. We can reduce stack usage by
660 turning this into a tail recursion. */
661
662 case OP_BRA:
663 case OP_SBRA:
664 DPRINTF(("start non-capturing bracket\n"));
665 flags = (op >= OP_SBRA)? match_cbegroup : 0;
666 for (;;)
667 {
668 if (ecode[GET(ecode, 1)] != OP_ALT)
669 {
670 ecode += _pcre_OP_lengths[*ecode];
671 flags |= match_tail_recursed;
672 DPRINTF(("bracket 0 tail recursion\n"));
673 goto TAIL_RECURSE;
674 }
675
676 /* For non-final alternatives, continue the loop for a NOMATCH result;
677 otherwise return. */
678
679 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
680 eptrb, flags);
681 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682 ecode += GET(ecode, 1);
683 }
684 /* Control never reaches here. */
685
686 /* Conditional group: compilation checked that there are no more than
687 two branches. If the condition is false, skipping the first branch takes us
688 past the end if there is only one branch, but that's OK because that is
689 exactly what going to the ket would do. As there is only one branch to be
690 obeyed, we can use tail recursion to avoid using another stack frame. */
691
692 case OP_COND:
693 case OP_SCOND:
694 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
695 {
696 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
697 condition = md->recursive != NULL &&
698 (offset == RREF_ANY || offset == md->recursive->group_num);
699 ecode += condition? 3 : GET(ecode, 1);
700 }
701
702 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
703 {
704 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
705 condition = offset < offset_top && md->offset_vector[offset] >= 0;
706 ecode += condition? 3 : GET(ecode, 1);
707 }
708
709 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
710 {
711 condition = FALSE;
712 ecode += GET(ecode, 1);
713 }
714
715 /* The condition is an assertion. Call match() to evaluate it - setting
716 the final argument match_condassert causes it to stop at the end of an
717 assertion. */
718
719 else
720 {
721 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
722 match_condassert);
723 if (rrc == MATCH_MATCH)
724 {
725 condition = TRUE;
726 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
727 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
728 }
729 else if (rrc != MATCH_NOMATCH)
730 {
731 RRETURN(rrc); /* Need braces because of following else */
732 }
733 else
734 {
735 condition = FALSE;
736 ecode += GET(ecode, 1);
737 }
738 }
739
740 /* We are now at the branch that is to be obeyed. As there is only one,
741 we can use tail recursion to avoid using another stack frame. If the second
742 alternative doesn't exist, we can just plough on. */
743
744 if (condition || *ecode == OP_ALT)
745 {
746 ecode += 1 + LINK_SIZE;
747 flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
748 goto TAIL_RECURSE;
749 }
750 else
751 {
752 ecode += 1 + LINK_SIZE;
753 }
754 break;
755
756
757 /* End of the pattern. If we are in a top-level recursion, we should
758 restore the offsets appropriately and continue from after the call. */
759
760 case OP_END:
761 if (md->recursive != NULL && md->recursive->group_num == 0)
762 {
763 recursion_info *rec = md->recursive;
764 DPRINTF(("End of pattern in a (?0) recursion\n"));
765 md->recursive = rec->prevrec;
766 memmove(md->offset_vector, rec->offset_save,
767 rec->saved_max * sizeof(int));
768 md->start_match = rec->save_start;
769 ims = original_ims;
770 ecode = rec->after_call;
771 break;
772 }
773
774 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
775 string - backtracking will then try other alternatives, if any. */
776
777 if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
778 md->end_match_ptr = eptr; /* Record where we ended */
779 md->end_offset_top = offset_top; /* and how many extracts were taken */
780 RRETURN(MATCH_MATCH);
781
782 /* Change option settings */
783
784 case OP_OPT:
785 ims = ecode[1];
786 ecode += 2;
787 DPRINTF(("ims set to %02lx\n", ims));
788 break;
789
790 /* Assertion brackets. Check the alternative branches in turn - the
791 matching won't pass the KET for an assertion. If any one branch matches,
792 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
793 start of each branch to move the current point backwards, so the code at
794 this level is identical to the lookahead case. */
795
796 case OP_ASSERT:
797 case OP_ASSERTBACK:
798 do
799 {
800 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
801 if (rrc == MATCH_MATCH) break;
802 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803 ecode += GET(ecode, 1);
804 }
805 while (*ecode == OP_ALT);
806 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
807
808 /* If checking an assertion for a condition, return MATCH_MATCH. */
809
810 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
811
812 /* Continue from after the assertion, updating the offsets high water
813 mark, since extracts may have been taken during the assertion. */
814
815 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
816 ecode += 1 + LINK_SIZE;
817 offset_top = md->end_offset_top;
818 continue;
819
820 /* Negative assertion: all branches must fail to match */
821
822 case OP_ASSERT_NOT:
823 case OP_ASSERTBACK_NOT:
824 do
825 {
826 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
827 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
828 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829 ecode += GET(ecode,1);
830 }
831 while (*ecode == OP_ALT);
832
833 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
834
835 ecode += 1 + LINK_SIZE;
836 continue;
837
838 /* Move the subject pointer back. This occurs only at the start of
839 each branch of a lookbehind assertion. If we are too close to the start to
840 move back, this match function fails. When working with UTF-8 we move
841 back a number of characters, not bytes. */
842
843 case OP_REVERSE:
844 #ifdef SUPPORT_UTF8
845 if (utf8)
846 {
847 i = GET(ecode, 1);
848 while (i-- > 0)
849 {
850 eptr--;
851 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
852 BACKCHAR(eptr)
853 }
854 }
855 else
856 #endif
857
858 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
859
860 {
861 eptr -= GET(ecode, 1);
862 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
863 }
864
865 /* Skip to next op code */
866
867 ecode += 1 + LINK_SIZE;
868 break;
869
870 /* The callout item calls an external function, if one is provided, passing
871 details of the match so far. This is mainly for debugging, though the
872 function is able to force a failure. */
873
874 case OP_CALLOUT:
875 if (pcre_callout != NULL)
876 {
877 pcre_callout_block cb;
878 cb.version = 1; /* Version 1 of the callout block */
879 cb.callout_number = ecode[1];
880 cb.offset_vector = md->offset_vector;
881 cb.subject = (PCRE_SPTR)md->start_subject;
882 cb.subject_length = md->end_subject - md->start_subject;
883 cb.start_match = md->start_match - md->start_subject;
884 cb.current_position = eptr - md->start_subject;
885 cb.pattern_position = GET(ecode, 2);
886 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
887 cb.capture_top = offset_top/2;
888 cb.capture_last = md->capture_last;
889 cb.callout_data = md->callout_data;
890 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
891 if (rrc < 0) RRETURN(rrc);
892 }
893 ecode += 2 + 2*LINK_SIZE;
894 break;
895
896 /* Recursion either matches the current regex, or some subexpression. The
897 offset data is the offset to the starting bracket from the start of the
898 whole pattern. (This is so that it works from duplicated subpatterns.)
899
900 If there are any capturing brackets started but not finished, we have to
901 save their starting points and reinstate them after the recursion. However,
902 we don't know how many such there are (offset_top records the completed
903 total) so we just have to save all the potential data. There may be up to
904 65535 such values, which is too large to put on the stack, but using malloc
905 for small numbers seems expensive. As a compromise, the stack is used when
906 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
907 is used. A problem is what to do if the malloc fails ... there is no way of
908 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
909 values on the stack, and accept that the rest may be wrong.
910
911 There are also other values that have to be saved. We use a chained
912 sequence of blocks that actually live on the stack. Thanks to Robin Houston
913 for the original version of this logic. */
914
915 case OP_RECURSE:
916 {
917 callpat = md->start_code + GET(ecode, 1);
918 new_recursive.group_num = (callpat == md->start_code)? 0 :
919 GET2(callpat, 1 + LINK_SIZE);
920
921 /* Add to "recursing stack" */
922
923 new_recursive.prevrec = md->recursive;
924 md->recursive = &new_recursive;
925
926 /* Find where to continue from afterwards */
927
928 ecode += 1 + LINK_SIZE;
929 new_recursive.after_call = ecode;
930
931 /* Now save the offset data. */
932
933 new_recursive.saved_max = md->offset_end;
934 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
935 new_recursive.offset_save = stacksave;
936 else
937 {
938 new_recursive.offset_save =
939 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
940 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
941 }
942
943 memcpy(new_recursive.offset_save, md->offset_vector,
944 new_recursive.saved_max * sizeof(int));
945 new_recursive.save_start = md->start_match;
946 md->start_match = eptr;
947
948 /* OK, now we can do the recursion. For each top-level alternative we
949 restore the offset and recursion data. */
950
951 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
952 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
953 do
954 {
955 RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
956 md, ims, eptrb, flags);
957 if (rrc == MATCH_MATCH)
958 {
959 DPRINTF(("Recursion matched\n"));
960 md->recursive = new_recursive.prevrec;
961 if (new_recursive.offset_save != stacksave)
962 (pcre_free)(new_recursive.offset_save);
963 RRETURN(MATCH_MATCH);
964 }
965 else if (rrc != MATCH_NOMATCH)
966 {
967 DPRINTF(("Recursion gave error %d\n", rrc));
968 RRETURN(rrc);
969 }
970
971 md->recursive = &new_recursive;
972 memcpy(md->offset_vector, new_recursive.offset_save,
973 new_recursive.saved_max * sizeof(int));
974 callpat += GET(callpat, 1);
975 }
976 while (*callpat == OP_ALT);
977
978 DPRINTF(("Recursion didn't match\n"));
979 md->recursive = new_recursive.prevrec;
980 if (new_recursive.offset_save != stacksave)
981 (pcre_free)(new_recursive.offset_save);
982 RRETURN(MATCH_NOMATCH);
983 }
984 /* Control never reaches here */
985
986 /* "Once" brackets are like assertion brackets except that after a match,
987 the point in the subject string is not moved back. Thus there can never be
988 a move back into the brackets. Friedl calls these "atomic" subpatterns.
989 Check the alternative branches in turn - the matching won't pass the KET
990 for this kind of subpattern. If any one branch matches, we carry on as at
991 the end of a normal bracket, leaving the subject pointer. */
992
993 case OP_ONCE:
994 prev = ecode;
995 saved_eptr = eptr;
996
997 do
998 {
999 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1000 eptrb, 0);
1001 if (rrc == MATCH_MATCH) break;
1002 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1003 ecode += GET(ecode,1);
1004 }
1005 while (*ecode == OP_ALT);
1006
1007 /* If hit the end of the group (which could be repeated), fail */
1008
1009 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1010
1011 /* Continue as from after the assertion, updating the offsets high water
1012 mark, since extracts may have been taken. */
1013
1014 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1015
1016 offset_top = md->end_offset_top;
1017 eptr = md->end_match_ptr;
1018
1019 /* For a non-repeating ket, just continue at this level. This also
1020 happens for a repeating ket if no characters were matched in the group.
1021 This is the forcible breaking of infinite loops as implemented in Perl
1022 5.005. If there is an options reset, it will get obeyed in the normal
1023 course of events. */
1024
1025 if (*ecode == OP_KET || eptr == saved_eptr)
1026 {
1027 ecode += 1+LINK_SIZE;
1028 break;
1029 }
1030
1031 /* The repeating kets try the rest of the pattern or restart from the
1032 preceding bracket, in the appropriate order. The second "call" of match()
1033 uses tail recursion, to avoid using another stack frame. We need to reset
1034 any options that changed within the bracket before re-running it, so
1035 check the next opcode. */
1036
1037 if (ecode[1+LINK_SIZE] == OP_OPT)
1038 {
1039 ims = (ims & ~PCRE_IMS) | ecode[4];
1040 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1041 }
1042
1043 if (*ecode == OP_KETRMIN)
1044 {
1045 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1046 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1047 ecode = prev;
1048 flags = match_tail_recursed;
1049 goto TAIL_RECURSE;
1050 }
1051 else /* OP_KETRMAX */
1052 {
1053 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1054 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1055 ecode += 1 + LINK_SIZE;
1056 flags = match_tail_recursed;
1057 goto TAIL_RECURSE;
1058 }
1059 /* Control never gets here */
1060
1061 /* An alternation is the end of a branch; scan along to find the end of the
1062 bracketed group and go to there. */
1063
1064 case OP_ALT:
1065 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1066 break;
1067
1068 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1069 that it may occur zero times. It may repeat infinitely, or not at all -
1070 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1071 repeat limits are compiled as a number of copies, with the optional ones
1072 preceded by BRAZERO or BRAMINZERO. */
1073
1074 case OP_BRAZERO:
1075 {
1076 next = ecode+1;
1077 RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1078 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079 do next += GET(next,1); while (*next == OP_ALT);
1080 ecode = next + 1 + LINK_SIZE;
1081 }
1082 break;
1083
1084 case OP_BRAMINZERO:
1085 {
1086 next = ecode+1;
1087 do next += GET(next, 1); while (*next == OP_ALT);
1088 RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1089 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1090 ecode++;
1091 }
1092 break;
1093
1094 /* End of a group, repeated or non-repeating. */
1095
1096 case OP_KET:
1097 case OP_KETRMIN:
1098 case OP_KETRMAX:
1099 prev = ecode - GET(ecode, 1);
1100
1101 /* If this was a group that remembered the subject start, in order to break
1102 infinite repeats of empty string matches, retrieve the subject start from
1103 the chain. Otherwise, set it NULL. */
1104
1105 if (*prev >= OP_SBRA)
1106 {
1107 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1108 eptrb = eptrb->epb_prev; /* Backup to previous group */
1109 }
1110 else saved_eptr = NULL;
1111
1112 /* If we are at the end of an assertion group, stop matching and return
1113 MATCH_MATCH, but record the current high water mark for use by positive
1114 assertions. Do this also for the "once" (atomic) groups. */
1115
1116 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1117 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1118 *prev == OP_ONCE)
1119 {
1120 md->end_match_ptr = eptr; /* For ONCE */
1121 md->end_offset_top = offset_top;
1122 RRETURN(MATCH_MATCH);
1123 }
1124
1125 /* For capturing groups we have to check the group number back at the start
1126 and if necessary complete handling an extraction by setting the offsets and
1127 bumping the high water mark. Note that whole-pattern recursion is coded as
1128 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1129 when the OP_END is reached. Other recursion is handled here. */
1130
1131 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1132 {
1133 number = GET2(prev, 1+LINK_SIZE);
1134 offset = number << 1;
1135
1136 #ifdef DEBUG
1137 printf("end bracket %d", number);
1138 printf("\n");
1139 #endif
1140
1141 md->capture_last = number;
1142 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143 {
1144 md->offset_vector[offset] =
1145 md->offset_vector[md->offset_end - number];
1146 md->offset_vector[offset+1] = eptr - md->start_subject;
1147 if (offset_top <= offset) offset_top = offset + 2;
1148 }
1149
1150 /* Handle a recursively called group. Restore the offsets
1151 appropriately and continue from after the call. */
1152
1153 if (md->recursive != NULL && md->recursive->group_num == number)
1154 {
1155 recursion_info *rec = md->recursive;
1156 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1157 md->recursive = rec->prevrec;
1158 md->start_match = rec->save_start;
1159 memcpy(md->offset_vector, rec->offset_save,
1160 rec->saved_max * sizeof(int));
1161 ecode = rec->after_call;
1162 ims = original_ims;
1163 break;
1164 }
1165 }
1166
1167 /* For both capturing and non-capturing groups, reset the value of the ims
1168 flags, in case they got changed during the group. */
1169
1170 ims = original_ims;
1171 DPRINTF(("ims reset to %02lx\n", ims));
1172
1173 /* For a non-repeating ket, just continue at this level. This also
1174 happens for a repeating ket if no characters were matched in the group.
1175 This is the forcible breaking of infinite loops as implemented in Perl
1176 5.005. If there is an options reset, it will get obeyed in the normal
1177 course of events. */
1178
1179 if (*ecode == OP_KET || eptr == saved_eptr)
1180 {
1181 ecode += 1 + LINK_SIZE;
1182 break;
1183 }
1184
1185 /* The repeating kets try the rest of the pattern or restart from the
1186 preceding bracket, in the appropriate order. In the second case, we can use
1187 tail recursion to avoid using another stack frame. */
1188
1189 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1190
1191 if (*ecode == OP_KETRMIN)
1192 {
1193 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1194 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195 ecode = prev;
1196 flags |= match_tail_recursed;
1197 goto TAIL_RECURSE;
1198 }
1199 else /* OP_KETRMAX */
1200 {
1201 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1202 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1203 ecode += 1 + LINK_SIZE;
1204 flags = match_tail_recursed;
1205 goto TAIL_RECURSE;
1206 }
1207 /* Control never gets here */
1208
1209 /* Start of subject unless notbol, or after internal newline if multiline */
1210
1211 case OP_CIRC:
1212 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1213 if ((ims & PCRE_MULTILINE) != 0)
1214 {
1215 if (eptr != md->start_subject &&
1216 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1217 RRETURN(MATCH_NOMATCH);
1218 ecode++;
1219 break;
1220 }
1221 /* ... else fall through */
1222
1223 /* Start of subject assertion */
1224
1225 case OP_SOD:
1226 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1227 ecode++;
1228 break;
1229
1230 /* Start of match assertion */
1231
1232 case OP_SOM:
1233 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1234 ecode++;
1235 break;
1236
1237 /* Assert before internal newline if multiline, or before a terminating
1238 newline unless endonly is set, else end of subject unless noteol is set. */
1239
1240 case OP_DOLL:
1241 if ((ims & PCRE_MULTILINE) != 0)
1242 {
1243 if (eptr < md->end_subject)
1244 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1245 else
1246 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1247 ecode++;
1248 break;
1249 }
1250 else
1251 {
1252 if (md->noteol) RRETURN(MATCH_NOMATCH);
1253 if (!md->endonly)
1254 {
1255 if (eptr != md->end_subject &&
1256 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1257 RRETURN(MATCH_NOMATCH);
1258 ecode++;
1259 break;
1260 }
1261 }
1262 /* ... else fall through for endonly */
1263
1264 /* End of subject assertion (\z) */
1265
1266 case OP_EOD:
1267 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1268 ecode++;
1269 break;
1270
1271 /* End of subject or ending \n assertion (\Z) */
1272
1273 case OP_EODN:
1274 if (eptr != md->end_subject &&
1275 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1276 RRETURN(MATCH_NOMATCH);
1277 ecode++;
1278 break;
1279
1280 /* Word boundary assertions */
1281
1282 case OP_NOT_WORD_BOUNDARY:
1283 case OP_WORD_BOUNDARY:
1284 {
1285
1286 /* Find out if the previous and current characters are "word" characters.
1287 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1288 be "non-word" characters. */
1289
1290 #ifdef SUPPORT_UTF8
1291 if (utf8)
1292 {
1293 if (eptr == md->start_subject) prev_is_word = FALSE; else
1294 {
1295 const uschar *lastptr = eptr - 1;
1296 while((*lastptr & 0xc0) == 0x80) lastptr--;
1297 GETCHAR(c, lastptr);
1298 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1299 }
1300 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1301 {
1302 GETCHAR(c, eptr);
1303 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1304 }
1305 }
1306 else
1307 #endif
1308
1309 /* More streamlined when not in UTF-8 mode */
1310
1311 {
1312 prev_is_word = (eptr != md->start_subject) &&
1313 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1314 cur_is_word = (eptr < md->end_subject) &&
1315 ((md->ctypes[*eptr] & ctype_word) != 0);
1316 }
1317
1318 /* Now see if the situation is what we want */
1319
1320 if ((*ecode++ == OP_WORD_BOUNDARY)?
1321 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1322 RRETURN(MATCH_NOMATCH);
1323 }
1324 break;
1325
1326 /* Match a single character type; inline for speed */
1327
1328 case OP_ANY:
1329 if ((ims & PCRE_DOTALL) == 0)
1330 {
1331 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1332 }
1333 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1334 if (utf8)
1335 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1336 ecode++;
1337 break;
1338
1339 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1340 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1341
1342 case OP_ANYBYTE:
1343 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1344 ecode++;
1345 break;
1346
1347 case OP_NOT_DIGIT:
1348 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1349 GETCHARINCTEST(c, eptr);
1350 if (
1351 #ifdef SUPPORT_UTF8
1352 c < 256 &&
1353 #endif
1354 (md->ctypes[c] & ctype_digit) != 0
1355 )
1356 RRETURN(MATCH_NOMATCH);
1357 ecode++;
1358 break;
1359
1360 case OP_DIGIT:
1361 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1362 GETCHARINCTEST(c, eptr);
1363 if (
1364 #ifdef SUPPORT_UTF8
1365 c >= 256 ||
1366 #endif
1367 (md->ctypes[c] & ctype_digit) == 0
1368 )
1369 RRETURN(MATCH_NOMATCH);
1370 ecode++;
1371 break;
1372
1373 case OP_NOT_WHITESPACE:
1374 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1375 GETCHARINCTEST(c, eptr);
1376 if (
1377 #ifdef SUPPORT_UTF8
1378 c < 256 &&
1379 #endif
1380 (md->ctypes[c] & ctype_space) != 0
1381 )
1382 RRETURN(MATCH_NOMATCH);
1383 ecode++;
1384 break;
1385
1386 case OP_WHITESPACE:
1387 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1388 GETCHARINCTEST(c, eptr);
1389 if (
1390 #ifdef SUPPORT_UTF8
1391 c >= 256 ||
1392 #endif
1393 (md->ctypes[c] & ctype_space) == 0
1394 )
1395 RRETURN(MATCH_NOMATCH);
1396 ecode++;
1397 break;
1398
1399 case OP_NOT_WORDCHAR:
1400 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1401 GETCHARINCTEST(c, eptr);
1402 if (
1403 #ifdef SUPPORT_UTF8
1404 c < 256 &&
1405 #endif
1406 (md->ctypes[c] & ctype_word) != 0
1407 )
1408 RRETURN(MATCH_NOMATCH);
1409 ecode++;
1410 break;
1411
1412 case OP_WORDCHAR:
1413 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1414 GETCHARINCTEST(c, eptr);
1415 if (
1416 #ifdef SUPPORT_UTF8
1417 c >= 256 ||
1418 #endif
1419 (md->ctypes[c] & ctype_word) == 0
1420 )
1421 RRETURN(MATCH_NOMATCH);
1422 ecode++;
1423 break;
1424
1425 case OP_ANYNL:
1426 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1427 GETCHARINCTEST(c, eptr);
1428 switch(c)
1429 {
1430 default: RRETURN(MATCH_NOMATCH);
1431 case 0x000d:
1432 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1433 break;
1434 case 0x000a:
1435 case 0x000b:
1436 case 0x000c:
1437 case 0x0085:
1438 case 0x2028:
1439 case 0x2029:
1440 break;
1441 }
1442 ecode++;
1443 break;
1444
1445 #ifdef SUPPORT_UCP
1446 /* Check the next character by Unicode property. We will get here only
1447 if the support is in the binary; otherwise a compile-time error occurs. */
1448
1449 case OP_PROP:
1450 case OP_NOTPROP:
1451 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1452 GETCHARINCTEST(c, eptr);
1453 {
1454 int chartype, script;
1455 int category = _pcre_ucp_findprop(c, &chartype, &script);
1456
1457 switch(ecode[1])
1458 {
1459 case PT_ANY:
1460 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1461 break;
1462
1463 case PT_LAMP:
1464 if ((chartype == ucp_Lu ||
1465 chartype == ucp_Ll ||
1466 chartype == ucp_Lt) == (op == OP_NOTPROP))
1467 RRETURN(MATCH_NOMATCH);
1468 break;
1469
1470 case PT_GC:
1471 if ((ecode[2] != category) == (op == OP_PROP))
1472 RRETURN(MATCH_NOMATCH);
1473 break;
1474
1475 case PT_PC:
1476 if ((ecode[2] != chartype) == (op == OP_PROP))
1477 RRETURN(MATCH_NOMATCH);
1478 break;
1479
1480 case PT_SC:
1481 if ((ecode[2] != script) == (op == OP_PROP))
1482 RRETURN(MATCH_NOMATCH);
1483 break;
1484
1485 default:
1486 RRETURN(PCRE_ERROR_INTERNAL);
1487 }
1488
1489 ecode += 3;
1490 }
1491 break;
1492
1493 /* Match an extended Unicode sequence. We will get here only if the support
1494 is in the binary; otherwise a compile-time error occurs. */
1495
1496 case OP_EXTUNI:
1497 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1498 GETCHARINCTEST(c, eptr);
1499 {
1500 int chartype, script;
1501 int category = _pcre_ucp_findprop(c, &chartype, &script);
1502 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1503 while (eptr < md->end_subject)
1504 {
1505 int len = 1;
1506 if (!utf8) c = *eptr; else
1507 {
1508 GETCHARLEN(c, eptr, len);
1509 }
1510 category = _pcre_ucp_findprop(c, &chartype, &script);
1511 if (category != ucp_M) break;
1512 eptr += len;
1513 }
1514 }
1515 ecode++;
1516 break;
1517 #endif
1518
1519
1520 /* Match a back reference, possibly repeatedly. Look past the end of the
1521 item to see if there is repeat information following. The code is similar
1522 to that for character classes, but repeated for efficiency. Then obey
1523 similar code to character type repeats - written out again for speed.
1524 However, if the referenced string is the empty string, always treat
1525 it as matched, any number of times (otherwise there could be infinite
1526 loops). */
1527
1528 case OP_REF:
1529 {
1530 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1531 ecode += 3; /* Advance past item */
1532
1533 /* If the reference is unset, set the length to be longer than the amount
1534 of subject left; this ensures that every attempt at a match fails. We
1535 can't just fail here, because of the possibility of quantifiers with zero
1536 minima. */
1537
1538 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1539 md->end_subject - eptr + 1 :
1540 md->offset_vector[offset+1] - md->offset_vector[offset];
1541
1542 /* Set up for repetition, or handle the non-repeated case */
1543
1544 switch (*ecode)
1545 {
1546 case OP_CRSTAR:
1547 case OP_CRMINSTAR:
1548 case OP_CRPLUS:
1549 case OP_CRMINPLUS:
1550 case OP_CRQUERY:
1551 case OP_CRMINQUERY:
1552 c = *ecode++ - OP_CRSTAR;
1553 minimize = (c & 1) != 0;
1554 min = rep_min[c]; /* Pick up values from tables; */
1555 max = rep_max[c]; /* zero for max => infinity */
1556 if (max == 0) max = INT_MAX;
1557 break;
1558
1559 case OP_CRRANGE:
1560 case OP_CRMINRANGE:
1561 minimize = (*ecode == OP_CRMINRANGE);
1562 min = GET2(ecode, 1);
1563 max = GET2(ecode, 3);
1564 if (max == 0) max = INT_MAX;
1565 ecode += 5;
1566 break;
1567
1568 default: /* No repeat follows */
1569 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1570 eptr += length;
1571 continue; /* With the main loop */
1572 }
1573
1574 /* If the length of the reference is zero, just continue with the
1575 main loop. */
1576
1577 if (length == 0) continue;
1578
1579 /* First, ensure the minimum number of matches are present. We get back
1580 the length of the reference string explicitly rather than passing the
1581 address of eptr, so that eptr can be a register variable. */
1582
1583 for (i = 1; i <= min; i++)
1584 {
1585 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1586 eptr += length;
1587 }
1588
1589 /* If min = max, continue at the same level without recursion.
1590 They are not both allowed to be zero. */
1591
1592 if (min == max) continue;
1593
1594 /* If minimizing, keep trying and advancing the pointer */
1595
1596 if (minimize)
1597 {
1598 for (fi = min;; fi++)
1599 {
1600 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1601 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1602 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1603 RRETURN(MATCH_NOMATCH);
1604 eptr += length;
1605 }
1606 /* Control never gets here */
1607 }
1608
1609 /* If maximizing, find the longest string and work backwards */
1610
1611 else
1612 {
1613 pp = eptr;
1614 for (i = min; i < max; i++)
1615 {
1616 if (!match_ref(offset, eptr, length, md, ims)) break;
1617 eptr += length;
1618 }
1619 while (eptr >= pp)
1620 {
1621 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1622 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1623 eptr -= length;
1624 }
1625 RRETURN(MATCH_NOMATCH);
1626 }
1627 }
1628 /* Control never gets here */
1629
1630
1631
1632 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1633 used when all the characters in the class have values in the range 0-255,
1634 and either the matching is caseful, or the characters are in the range
1635 0-127 when UTF-8 processing is enabled. The only difference between
1636 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1637 encountered.
1638
1639 First, look past the end of the item to see if there is repeat information
1640 following. Then obey similar code to character type repeats - written out
1641 again for speed. */
1642
1643 case OP_NCLASS:
1644 case OP_CLASS:
1645 {
1646 data = ecode + 1; /* Save for matching */
1647 ecode += 33; /* Advance past the item */
1648
1649 switch (*ecode)
1650 {
1651 case OP_CRSTAR:
1652 case OP_CRMINSTAR:
1653 case OP_CRPLUS:
1654 case OP_CRMINPLUS:
1655 case OP_CRQUERY:
1656 case OP_CRMINQUERY:
1657 c = *ecode++ - OP_CRSTAR;
1658 minimize = (c & 1) != 0;
1659 min = rep_min[c]; /* Pick up values from tables; */
1660 max = rep_max[c]; /* zero for max => infinity */
1661 if (max == 0) max = INT_MAX;
1662 break;
1663
1664 case OP_CRRANGE:
1665 case OP_CRMINRANGE:
1666 minimize = (*ecode == OP_CRMINRANGE);
1667 min = GET2(ecode, 1);
1668 max = GET2(ecode, 3);
1669 if (max == 0) max = INT_MAX;
1670 ecode += 5;
1671 break;
1672
1673 default: /* No repeat follows */
1674 min = max = 1;
1675 break;
1676 }
1677
1678 /* First, ensure the minimum number of matches are present. */
1679
1680 #ifdef SUPPORT_UTF8
1681 /* UTF-8 mode */
1682 if (utf8)
1683 {
1684 for (i = 1; i <= min; i++)
1685 {
1686 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1687 GETCHARINC(c, eptr);
1688 if (c > 255)
1689 {
1690 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1691 }
1692 else
1693 {
1694 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1695 }
1696 }
1697 }
1698 else
1699 #endif
1700 /* Not UTF-8 mode */
1701 {
1702 for (i = 1; i <= min; i++)
1703 {
1704 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1705 c = *eptr++;
1706 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1707 }
1708 }
1709
1710 /* If max == min we can continue with the main loop without the
1711 need to recurse. */
1712
1713 if (min == max) continue;
1714
1715 /* If minimizing, keep testing the rest of the expression and advancing
1716 the pointer while it matches the class. */
1717
1718 if (minimize)
1719 {
1720 #ifdef SUPPORT_UTF8
1721 /* UTF-8 mode */
1722 if (utf8)
1723 {
1724 for (fi = min;; fi++)
1725 {
1726 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1727 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1728 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1729 GETCHARINC(c, eptr);
1730 if (c > 255)
1731 {
1732 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1733 }
1734 else
1735 {
1736 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1737 }
1738 }
1739 }
1740 else
1741 #endif
1742 /* Not UTF-8 mode */
1743 {
1744 for (fi = min;; fi++)
1745 {
1746 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1747 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1748 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1749 c = *eptr++;
1750 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1751 }
1752 }
1753 /* Control never gets here */
1754 }
1755
1756 /* If maximizing, find the longest possible run, then work backwards. */
1757
1758 else
1759 {
1760 pp = eptr;
1761
1762 #ifdef SUPPORT_UTF8
1763 /* UTF-8 mode */
1764 if (utf8)
1765 {
1766 for (i = min; i < max; i++)
1767 {
1768 int len = 1;
1769 if (eptr >= md->end_subject) break;
1770 GETCHARLEN(c, eptr, len);
1771 if (c > 255)
1772 {
1773 if (op == OP_CLASS) break;
1774 }
1775 else
1776 {
1777 if ((data[c/8] & (1 << (c&7))) == 0) break;
1778 }
1779 eptr += len;
1780 }
1781 for (;;)
1782 {
1783 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1784 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1785 if (eptr-- == pp) break; /* Stop if tried at original pos */
1786 BACKCHAR(eptr);
1787 }
1788 }
1789 else
1790 #endif
1791 /* Not UTF-8 mode */
1792 {
1793 for (i = min; i < max; i++)
1794 {
1795 if (eptr >= md->end_subject) break;
1796 c = *eptr;
1797 if ((data[c/8] & (1 << (c&7))) == 0) break;
1798 eptr++;
1799 }
1800 while (eptr >= pp)
1801 {
1802 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1803 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1804 eptr--;
1805 }
1806 }
1807
1808 RRETURN(MATCH_NOMATCH);
1809 }
1810 }
1811 /* Control never gets here */
1812
1813
1814 /* Match an extended character class. This opcode is encountered only
1815 in UTF-8 mode, because that's the only time it is compiled. */
1816
1817 #ifdef SUPPORT_UTF8
1818 case OP_XCLASS:
1819 {
1820 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1821 ecode += GET(ecode, 1); /* Advance past the item */
1822
1823 switch (*ecode)
1824 {
1825 case OP_CRSTAR:
1826 case OP_CRMINSTAR:
1827 case OP_CRPLUS:
1828 case OP_CRMINPLUS:
1829 case OP_CRQUERY:
1830 case OP_CRMINQUERY:
1831 c = *ecode++ - OP_CRSTAR;
1832 minimize = (c & 1) != 0;
1833 min = rep_min[c]; /* Pick up values from tables; */
1834 max = rep_max[c]; /* zero for max => infinity */
1835 if (max == 0) max = INT_MAX;
1836 break;
1837
1838 case OP_CRRANGE:
1839 case OP_CRMINRANGE:
1840 minimize = (*ecode == OP_CRMINRANGE);
1841 min = GET2(ecode, 1);
1842 max = GET2(ecode, 3);
1843 if (max == 0) max = INT_MAX;
1844 ecode += 5;
1845 break;
1846
1847 default: /* No repeat follows */
1848 min = max = 1;
1849 break;
1850 }
1851
1852 /* First, ensure the minimum number of matches are present. */
1853
1854 for (i = 1; i <= min; i++)
1855 {
1856 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1857 GETCHARINC(c, eptr);
1858 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1859 }
1860
1861 /* If max == min we can continue with the main loop without the
1862 need to recurse. */
1863
1864 if (min == max) continue;
1865
1866 /* If minimizing, keep testing the rest of the expression and advancing
1867 the pointer while it matches the class. */
1868
1869 if (minimize)
1870 {
1871 for (fi = min;; fi++)
1872 {
1873 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1874 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1875 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1876 GETCHARINC(c, eptr);
1877 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1878 }
1879 /* Control never gets here */
1880 }
1881
1882 /* If maximizing, find the longest possible run, then work backwards. */
1883
1884 else
1885 {
1886 pp = eptr;
1887 for (i = min; i < max; i++)
1888 {
1889 int len = 1;
1890 if (eptr >= md->end_subject) break;
1891 GETCHARLEN(c, eptr, len);
1892 if (!_pcre_xclass(c, data)) break;
1893 eptr += len;
1894 }
1895 for(;;)
1896 {
1897 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1898 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1899 if (eptr-- == pp) break; /* Stop if tried at original pos */
1900 BACKCHAR(eptr)
1901 }
1902 RRETURN(MATCH_NOMATCH);
1903 }
1904
1905 /* Control never gets here */
1906 }
1907 #endif /* End of XCLASS */
1908
1909 /* Match a single character, casefully */
1910
1911 case OP_CHAR:
1912 #ifdef SUPPORT_UTF8
1913 if (utf8)
1914 {
1915 length = 1;
1916 ecode++;
1917 GETCHARLEN(fc, ecode, length);
1918 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1919 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1920 }
1921 else
1922 #endif
1923
1924 /* Non-UTF-8 mode */
1925 {
1926 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1927 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1928 ecode += 2;
1929 }
1930 break;
1931
1932 /* Match a single character, caselessly */
1933
1934 case OP_CHARNC:
1935 #ifdef SUPPORT_UTF8
1936 if (utf8)
1937 {
1938 length = 1;
1939 ecode++;
1940 GETCHARLEN(fc, ecode, length);
1941
1942 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1943
1944 /* If the pattern character's value is < 128, we have only one byte, and
1945 can use the fast lookup table. */
1946
1947 if (fc < 128)
1948 {
1949 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1950 }
1951
1952 /* Otherwise we must pick up the subject character */
1953
1954 else
1955 {
1956 unsigned int dc;
1957 GETCHARINC(dc, eptr);
1958 ecode += length;
1959
1960 /* If we have Unicode property support, we can use it to test the other
1961 case of the character, if there is one. */
1962
1963 if (fc != dc)
1964 {
1965 #ifdef SUPPORT_UCP
1966 if (dc != _pcre_ucp_othercase(fc))
1967 #endif
1968 RRETURN(MATCH_NOMATCH);
1969 }
1970 }
1971 }
1972 else
1973 #endif /* SUPPORT_UTF8 */
1974
1975 /* Non-UTF-8 mode */
1976 {
1977 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1978 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1979 ecode += 2;
1980 }
1981 break;
1982
1983 /* Match a single character repeatedly. */
1984
1985 case OP_EXACT:
1986 min = max = GET2(ecode, 1);
1987 ecode += 3;
1988 goto REPEATCHAR;
1989
1990 case OP_POSUPTO:
1991 possessive = TRUE;
1992 /* Fall through */
1993
1994 case OP_UPTO:
1995 case OP_MINUPTO:
1996 min = 0;
1997 max = GET2(ecode, 1);
1998 minimize = *ecode == OP_MINUPTO;
1999 ecode += 3;
2000 goto REPEATCHAR;
2001
2002 case OP_POSSTAR:
2003 possessive = TRUE;
2004 min = 0;
2005 max = INT_MAX;
2006 ecode++;
2007 goto REPEATCHAR;
2008
2009 case OP_POSPLUS:
2010 possessive = TRUE;
2011 min = 1;
2012 max = INT_MAX;
2013 ecode++;
2014 goto REPEATCHAR;
2015
2016 case OP_POSQUERY:
2017 possessive = TRUE;
2018 min = 0;
2019 max = 1;
2020 ecode++;
2021 goto REPEATCHAR;
2022
2023 case OP_STAR:
2024 case OP_MINSTAR:
2025 case OP_PLUS:
2026 case OP_MINPLUS:
2027 case OP_QUERY:
2028 case OP_MINQUERY:
2029 c = *ecode++ - OP_STAR;
2030 minimize = (c & 1) != 0;
2031 min = rep_min[c]; /* Pick up values from tables; */
2032 max = rep_max[c]; /* zero for max => infinity */
2033 if (max == 0) max = INT_MAX;
2034
2035 /* Common code for all repeated single-character matches. We can give
2036 up quickly if there are fewer than the minimum number of characters left in
2037 the subject. */
2038
2039 REPEATCHAR:
2040 #ifdef SUPPORT_UTF8
2041 if (utf8)
2042 {
2043 length = 1;
2044 charptr = ecode;
2045 GETCHARLEN(fc, ecode, length);
2046 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2047 ecode += length;
2048
2049 /* Handle multibyte character matching specially here. There is
2050 support for caseless matching if UCP support is present. */
2051
2052 if (length > 1)
2053 {
2054 #ifdef SUPPORT_UCP
2055 unsigned int othercase;
2056 if ((ims & PCRE_CASELESS) != 0 &&
2057 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2058 oclength = _pcre_ord2utf8(othercase, occhars);
2059 else oclength = 0;
2060 #endif /* SUPPORT_UCP */
2061
2062 for (i = 1; i <= min; i++)
2063 {
2064 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2065 #ifdef SUPPORT_UCP
2066 /* Need braces because of following else */
2067 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2068 else
2069 {
2070 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2071 eptr += oclength;
2072 }
2073 #else /* without SUPPORT_UCP */
2074 else { RRETURN(MATCH_NOMATCH); }
2075 #endif /* SUPPORT_UCP */
2076 }
2077
2078 if (min == max) continue;
2079
2080 if (minimize)
2081 {
2082 for (fi = min;; fi++)
2083 {
2084 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2085 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2088 #ifdef SUPPORT_UCP
2089 /* Need braces because of following else */
2090 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2091 else
2092 {
2093 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2094 eptr += oclength;
2095 }
2096 #else /* without SUPPORT_UCP */
2097 else { RRETURN (MATCH_NOMATCH); }
2098 #endif /* SUPPORT_UCP */
2099 }
2100 /* Control never gets here */
2101 }
2102
2103 else /* Maximize */
2104 {
2105 pp = eptr;
2106 for (i = min; i < max; i++)
2107 {
2108 if (eptr > md->end_subject - length) break;
2109 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2110 #ifdef SUPPORT_UCP
2111 else if (oclength == 0) break;
2112 else
2113 {
2114 if (memcmp(eptr, occhars, oclength) != 0) break;
2115 eptr += oclength;
2116 }
2117 #else /* without SUPPORT_UCP */
2118 else break;
2119 #endif /* SUPPORT_UCP */
2120 }
2121
2122 if (possessive) continue;
2123 for(;;)
2124 {
2125 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2126 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2127 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2128 #ifdef SUPPORT_UCP
2129 eptr--;
2130 BACKCHAR(eptr);
2131 #else /* without SUPPORT_UCP */
2132 eptr -= length;
2133 #endif /* SUPPORT_UCP */
2134 }
2135 }
2136 /* Control never gets here */
2137 }
2138
2139 /* If the length of a UTF-8 character is 1, we fall through here, and
2140 obey the code as for non-UTF-8 characters below, though in this case the
2141 value of fc will always be < 128. */
2142 }
2143 else
2144 #endif /* SUPPORT_UTF8 */
2145
2146 /* When not in UTF-8 mode, load a single-byte character. */
2147 {
2148 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2149 fc = *ecode++;
2150 }
2151
2152 /* The value of fc at this point is always less than 256, though we may or
2153 may not be in UTF-8 mode. The code is duplicated for the caseless and
2154 caseful cases, for speed, since matching characters is likely to be quite
2155 common. First, ensure the minimum number of matches are present. If min =
2156 max, continue at the same level without recursing. Otherwise, if
2157 minimizing, keep trying the rest of the expression and advancing one
2158 matching character if failing, up to the maximum. Alternatively, if
2159 maximizing, find the maximum number of characters and work backwards. */
2160
2161 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2162 max, eptr));
2163
2164 if ((ims & PCRE_CASELESS) != 0)
2165 {
2166 fc = md->lcc[fc];
2167 for (i = 1; i <= min; i++)
2168 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2169 if (min == max) continue;
2170 if (minimize)
2171 {
2172 for (fi = min;; fi++)
2173 {
2174 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2175 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2176 if (fi >= max || eptr >= md->end_subject ||
2177 fc != md->lcc[*eptr++])
2178 RRETURN(MATCH_NOMATCH);
2179 }
2180 /* Control never gets here */
2181 }
2182 else /* Maximize */
2183 {
2184 pp = eptr;
2185 for (i = min; i < max; i++)
2186 {
2187 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2188 eptr++;
2189 }
2190 if (possessive) continue;
2191 while (eptr >= pp)
2192 {
2193 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2194 eptr--;
2195 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2196 }
2197 RRETURN(MATCH_NOMATCH);
2198 }
2199 /* Control never gets here */
2200 }
2201
2202 /* Caseful comparisons (includes all multi-byte characters) */
2203
2204 else
2205 {
2206 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2207 if (min == max) continue;
2208 if (minimize)
2209 {
2210 for (fi = min;; fi++)
2211 {
2212 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2213 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2214 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2215 RRETURN(MATCH_NOMATCH);
2216 }
2217 /* Control never gets here */
2218 }
2219 else /* Maximize */
2220 {
2221 pp = eptr;
2222 for (i = min; i < max; i++)
2223 {
2224 if (eptr >= md->end_subject || fc != *eptr) break;
2225 eptr++;
2226 }
2227 if (possessive) continue;
2228 while (eptr >= pp)
2229 {
2230 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2231 eptr--;
2232 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2233 }
2234 RRETURN(MATCH_NOMATCH);
2235 }
2236 }
2237 /* Control never gets here */
2238
2239 /* Match a negated single one-byte character. The character we are
2240 checking can be multibyte. */
2241
2242 case OP_NOT:
2243 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2244 ecode++;
2245 GETCHARINCTEST(c, eptr);
2246 if ((ims & PCRE_CASELESS) != 0)
2247 {
2248 #ifdef SUPPORT_UTF8
2249 if (c < 256)
2250 #endif
2251 c = md->lcc[c];
2252 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2253 }
2254 else
2255 {
2256 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2257 }
2258 break;
2259
2260 /* Match a negated single one-byte character repeatedly. This is almost a
2261 repeat of the code for a repeated single character, but I haven't found a
2262 nice way of commoning these up that doesn't require a test of the
2263 positive/negative option for each character match. Maybe that wouldn't add
2264 very much to the time taken, but character matching *is* what this is all
2265 about... */
2266
2267 case OP_NOTEXACT:
2268 min = max = GET2(ecode, 1);
2269 ecode += 3;
2270 goto REPEATNOTCHAR;
2271
2272 case OP_NOTUPTO:
2273 case OP_NOTMINUPTO:
2274 min = 0;
2275 max = GET2(ecode, 1);
2276 minimize = *ecode == OP_NOTMINUPTO;
2277 ecode += 3;
2278 goto REPEATNOTCHAR;
2279
2280 case OP_NOTPOSSTAR:
2281 possessive = TRUE;
2282 min = 0;
2283 max = INT_MAX;
2284 ecode++;
2285 goto REPEATNOTCHAR;
2286
2287 case OP_NOTPOSPLUS:
2288 possessive = TRUE;
2289 min = 1;
2290 max = INT_MAX;
2291 ecode++;
2292 goto REPEATNOTCHAR;
2293
2294 case OP_NOTPOSQUERY:
2295 possessive = TRUE;
2296 min = 0;
2297 max = 1;
2298 ecode++;
2299 goto REPEATNOTCHAR;
2300
2301 case OP_NOTPOSUPTO:
2302 possessive = TRUE;
2303 min = 0;
2304 max = GET2(ecode, 1);
2305 ecode += 3;
2306 goto REPEATNOTCHAR;
2307
2308 case OP_NOTSTAR:
2309 case OP_NOTMINSTAR:
2310 case OP_NOTPLUS:
2311 case OP_NOTMINPLUS:
2312 case OP_NOTQUERY:
2313 case OP_NOTMINQUERY:
2314 c = *ecode++ - OP_NOTSTAR;
2315 minimize = (c & 1) != 0;
2316 min = rep_min[c]; /* Pick up values from tables; */
2317 max = rep_max[c]; /* zero for max => infinity */
2318 if (max == 0) max = INT_MAX;
2319
2320 /* Common code for all repeated single-byte matches. We can give up quickly
2321 if there are fewer than the minimum number of bytes left in the
2322 subject. */
2323
2324 REPEATNOTCHAR:
2325 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2326 fc = *ecode++;
2327
2328 /* The code is duplicated for the caseless and caseful cases, for speed,
2329 since matching characters is likely to be quite common. First, ensure the
2330 minimum number of matches are present. If min = max, continue at the same
2331 level without recursing. Otherwise, if minimizing, keep trying the rest of
2332 the expression and advancing one matching character if failing, up to the
2333 maximum. Alternatively, if maximizing, find the maximum number of
2334 characters and work backwards. */
2335
2336 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2337 max, eptr));
2338
2339 if ((ims & PCRE_CASELESS) != 0)
2340 {
2341 fc = md->lcc[fc];
2342
2343 #ifdef SUPPORT_UTF8
2344 /* UTF-8 mode */
2345 if (utf8)
2346 {
2347 register unsigned int d;
2348 for (i = 1; i <= min; i++)
2349 {
2350 GETCHARINC(d, eptr);
2351 if (d < 256) d = md->lcc[d];
2352 if (fc == d) RRETURN(MATCH_NOMATCH);
2353 }
2354 }
2355 else
2356 #endif
2357
2358 /* Not UTF-8 mode */
2359 {
2360 for (i = 1; i <= min; i++)
2361 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2362 }
2363
2364 if (min == max) continue;
2365
2366 if (minimize)
2367 {
2368 #ifdef SUPPORT_UTF8
2369 /* UTF-8 mode */
2370 if (utf8)
2371 {
2372 register unsigned int d;
2373 for (fi = min;; fi++)
2374 {
2375 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2376 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2377 GETCHARINC(d, eptr);
2378 if (d < 256) d = md->lcc[d];
2379 if (fi >= max || eptr >= md->end_subject || fc == d)
2380 RRETURN(MATCH_NOMATCH);
2381 }
2382 }
2383 else
2384 #endif
2385 /* Not UTF-8 mode */
2386 {
2387 for (fi = min;; fi++)
2388 {
2389 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2390 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2391 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2392 RRETURN(MATCH_NOMATCH);
2393 }
2394 }
2395 /* Control never gets here */
2396 }
2397
2398 /* Maximize case */
2399
2400 else
2401 {
2402 pp = eptr;
2403
2404 #ifdef SUPPORT_UTF8
2405 /* UTF-8 mode */
2406 if (utf8)
2407 {
2408 register unsigned int d;
2409 for (i = min; i < max; i++)
2410 {
2411 int len = 1;
2412 if (eptr >= md->end_subject) break;
2413 GETCHARLEN(d, eptr, len);
2414 if (d < 256) d = md->lcc[d];
2415 if (fc == d) break;
2416 eptr += len;
2417 }
2418 if (possessive) continue;
2419 for(;;)
2420 {
2421 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2422 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2423 if (eptr-- == pp) break; /* Stop if tried at original pos */
2424 BACKCHAR(eptr);
2425 }
2426 }
2427 else
2428 #endif
2429 /* Not UTF-8 mode */
2430 {
2431 for (i = min; i < max; i++)
2432 {
2433 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2434 eptr++;
2435 }
2436 if (possessive) continue;
2437 while (eptr >= pp)
2438 {
2439 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2440 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2441 eptr--;
2442 }
2443 }
2444
2445 RRETURN(MATCH_NOMATCH);
2446 }
2447 /* Control never gets here */
2448 }
2449
2450 /* Caseful comparisons */
2451
2452 else
2453 {
2454 #ifdef SUPPORT_UTF8
2455 /* UTF-8 mode */
2456 if (utf8)
2457 {
2458 register unsigned int d;
2459 for (i = 1; i <= min; i++)
2460 {
2461 GETCHARINC(d, eptr);
2462 if (fc == d) RRETURN(MATCH_NOMATCH);
2463 }
2464 }
2465 else
2466 #endif
2467 /* Not UTF-8 mode */
2468 {
2469 for (i = 1; i <= min; i++)
2470 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2471 }
2472
2473 if (min == max) continue;
2474
2475 if (minimize)
2476 {
2477 #ifdef SUPPORT_UTF8
2478 /* UTF-8 mode */
2479 if (utf8)
2480 {
2481 register unsigned int d;
2482 for (fi = min;; fi++)
2483 {
2484 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2485 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2486 GETCHARINC(d, eptr);
2487 if (fi >= max || eptr >= md->end_subject || fc == d)
2488 RRETURN(MATCH_NOMATCH);
2489 }
2490 }
2491 else
2492 #endif
2493 /* Not UTF-8 mode */
2494 {
2495 for (fi = min;; fi++)
2496 {
2497 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2498 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2499 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2500 RRETURN(MATCH_NOMATCH);
2501 }
2502 }
2503 /* Control never gets here */
2504 }
2505
2506 /* Maximize case */
2507
2508 else
2509 {
2510 pp = eptr;
2511
2512 #ifdef SUPPORT_UTF8
2513 /* UTF-8 mode */
2514 if (utf8)
2515 {
2516 register unsigned int d;
2517 for (i = min; i < max; i++)
2518 {
2519 int len = 1;
2520 if (eptr >= md->end_subject) break;
2521 GETCHARLEN(d, eptr, len);
2522 if (fc == d) break;
2523 eptr += len;
2524 }
2525 if (possessive) continue;
2526 for(;;)
2527 {
2528 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2529 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2530 if (eptr-- == pp) break; /* Stop if tried at original pos */
2531 BACKCHAR(eptr);
2532 }
2533 }
2534 else
2535 #endif
2536 /* Not UTF-8 mode */
2537 {
2538 for (i = min; i < max; i++)
2539 {
2540 if (eptr >= md->end_subject || fc == *eptr) break;
2541 eptr++;
2542 }
2543 if (possessive) continue;
2544 while (eptr >= pp)
2545 {
2546 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2547 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2548 eptr--;
2549 }
2550 }
2551
2552 RRETURN(MATCH_NOMATCH);
2553 }
2554 }
2555 /* Control never gets here */
2556
2557 /* Match a single character type repeatedly; several different opcodes
2558 share code. This is very similar to the code for single characters, but we
2559 repeat it in the interests of efficiency. */
2560
2561 case OP_TYPEEXACT:
2562 min = max = GET2(ecode, 1);
2563 minimize = TRUE;
2564 ecode += 3;
2565 goto REPEATTYPE;
2566
2567 case OP_TYPEUPTO:
2568 case OP_TYPEMINUPTO:
2569 min = 0;
2570 max = GET2(ecode, 1);
2571 minimize = *ecode == OP_TYPEMINUPTO;
2572 ecode += 3;
2573 goto REPEATTYPE;
2574
2575 case OP_TYPEPOSSTAR:
2576 possessive = TRUE;
2577 min = 0;
2578 max = INT_MAX;
2579 ecode++;
2580 goto REPEATTYPE;
2581
2582 case OP_TYPEPOSPLUS:
2583 possessive = TRUE;
2584 min = 1;
2585 max = INT_MAX;
2586 ecode++;
2587 goto REPEATTYPE;
2588
2589 case OP_TYPEPOSQUERY:
2590 possessive = TRUE;
2591 min = 0;
2592 max = 1;
2593 ecode++;
2594 goto REPEATTYPE;
2595
2596 case OP_TYPEPOSUPTO:
2597 possessive = TRUE;
2598 min = 0;
2599 max = GET2(ecode, 1);
2600 ecode += 3;
2601 goto REPEATTYPE;
2602
2603 case OP_TYPESTAR:
2604 case OP_TYPEMINSTAR:
2605 case OP_TYPEPLUS:
2606 case OP_TYPEMINPLUS:
2607 case OP_TYPEQUERY:
2608 case OP_TYPEMINQUERY:
2609 c = *ecode++ - OP_TYPESTAR;
2610 minimize = (c & 1) != 0;
2611 min = rep_min[c]; /* Pick up values from tables; */
2612 max = rep_max[c]; /* zero for max => infinity */
2613 if (max == 0) max = INT_MAX;
2614
2615 /* Common code for all repeated single character type matches. Note that
2616 in UTF-8 mode, '.' matches a character of any length, but for the other
2617 character types, the valid characters are all one-byte long. */
2618
2619 REPEATTYPE:
2620 ctype = *ecode++; /* Code for the character type */
2621
2622 #ifdef SUPPORT_UCP
2623 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2624 {
2625 prop_fail_result = ctype == OP_NOTPROP;
2626 prop_type = *ecode++;
2627 prop_value = *ecode++;
2628 }
2629 else prop_type = -1;
2630 #endif
2631
2632 /* First, ensure the minimum number of matches are present. Use inline
2633 code for maximizing the speed, and do the type test once at the start
2634 (i.e. keep it out of the loop). Also we can test that there are at least
2635 the minimum number of bytes before we start. This isn't as effective in
2636 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2637 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2638 and single-bytes. */
2639
2640 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2641 if (min > 0)
2642 {
2643 #ifdef SUPPORT_UCP
2644 if (prop_type >= 0)
2645 {
2646 switch(prop_type)
2647 {
2648 case PT_ANY:
2649 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2650 for (i = 1; i <= min; i++)
2651 {
2652 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653 GETCHARINC(c, eptr);
2654 }
2655 break;
2656
2657 case PT_LAMP:
2658 for (i = 1; i <= min; i++)
2659 {
2660 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2661 GETCHARINC(c, eptr);
2662 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2663 if ((prop_chartype == ucp_Lu ||
2664 prop_chartype == ucp_Ll ||
2665 prop_chartype == ucp_Lt) == prop_fail_result)
2666 RRETURN(MATCH_NOMATCH);
2667 }
2668 break;
2669
2670 case PT_GC:
2671 for (i = 1; i <= min; i++)
2672 {
2673 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2674 GETCHARINC(c, eptr);
2675 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2676 if ((prop_category == prop_value) == prop_fail_result)
2677 RRETURN(MATCH_NOMATCH);
2678 }
2679 break;
2680
2681 case PT_PC:
2682 for (i = 1; i <= min; i++)
2683 {
2684 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2685 GETCHARINC(c, eptr);
2686 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2687 if ((prop_chartype == prop_value) == prop_fail_result)
2688 RRETURN(MATCH_NOMATCH);
2689 }
2690 break;
2691
2692 case PT_SC:
2693 for (i = 1; i <= min; i++)
2694 {
2695 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2696 GETCHARINC(c, eptr);
2697 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2698 if ((prop_script == prop_value) == prop_fail_result)
2699 RRETURN(MATCH_NOMATCH);
2700 }
2701 break;
2702
2703 default:
2704 RRETURN(PCRE_ERROR_INTERNAL);
2705 }
2706 }
2707
2708 /* Match extended Unicode sequences. We will get here only if the
2709 support is in the binary; otherwise a compile-time error occurs. */
2710
2711 else if (ctype == OP_EXTUNI)
2712 {
2713 for (i = 1; i <= min; i++)
2714 {
2715 GETCHARINCTEST(c, eptr);
2716 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2717 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2718 while (eptr < md->end_subject)
2719 {
2720 int len = 1;
2721 if (!utf8) c = *eptr; else
2722 {
2723 GETCHARLEN(c, eptr, len);
2724 }
2725 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2726 if (prop_category != ucp_M) break;
2727 eptr += len;
2728 }
2729 }
2730 }
2731
2732 else
2733 #endif /* SUPPORT_UCP */
2734
2735 /* Handle all other cases when the coding is UTF-8 */
2736
2737 #ifdef SUPPORT_UTF8
2738 if (utf8) switch(ctype)
2739 {
2740 case OP_ANY:
2741 for (i = 1; i <= min; i++)
2742 {
2743 if (eptr >= md->end_subject ||
2744 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2745 RRETURN(MATCH_NOMATCH);
2746 eptr++;
2747 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2748 }
2749 break;
2750
2751 case OP_ANYBYTE:
2752 eptr += min;
2753 break;
2754
2755 case OP_ANYNL:
2756 for (i = 1; i <= min; i++)
2757 {
2758 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2759 GETCHARINC(c, eptr);
2760 switch(c)
2761 {
2762 default: RRETURN(MATCH_NOMATCH);
2763 case 0x000d:
2764 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2765 break;
2766 case 0x000a:
2767 case 0x000b:
2768 case 0x000c:
2769 case 0x0085:
2770 case 0x2028:
2771 case 0x2029:
2772 break;
2773 }
2774 }
2775 break;
2776
2777 case OP_NOT_DIGIT:
2778 for (i = 1; i <= min; i++)
2779 {
2780 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2781 GETCHARINC(c, eptr);
2782 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2783 RRETURN(MATCH_NOMATCH);
2784 }
2785 break;
2786
2787 case OP_DIGIT:
2788 for (i = 1; i <= min; i++)
2789 {
2790 if (eptr >= md->end_subject ||
2791 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2792 RRETURN(MATCH_NOMATCH);
2793 /* No need to skip more bytes - we know it's a 1-byte character */
2794 }
2795 break;
2796
2797 case OP_NOT_WHITESPACE:
2798 for (i = 1; i <= min; i++)
2799 {
2800 if (eptr >= md->end_subject ||
2801 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2802 RRETURN(MATCH_NOMATCH);
2803 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2804 }
2805 break;
2806
2807 case OP_WHITESPACE:
2808 for (i = 1; i <= min; i++)
2809 {
2810 if (eptr >= md->end_subject ||
2811 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2812 RRETURN(MATCH_NOMATCH);
2813 /* No need to skip more bytes - we know it's a 1-byte character */
2814 }
2815 break;
2816
2817 case OP_NOT_WORDCHAR:
2818 for (i = 1; i <= min; i++)
2819 {
2820 if (eptr >= md->end_subject ||
2821 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2822 RRETURN(MATCH_NOMATCH);
2823 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2824 }
2825 break;
2826
2827 case OP_WORDCHAR:
2828 for (i = 1; i <= min; i++)
2829 {
2830 if (eptr >= md->end_subject ||
2831 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2832 RRETURN(MATCH_NOMATCH);
2833 /* No need to skip more bytes - we know it's a 1-byte character */
2834 }
2835 break;
2836
2837 default:
2838 RRETURN(PCRE_ERROR_INTERNAL);
2839 } /* End switch(ctype) */
2840
2841 else
2842 #endif /* SUPPORT_UTF8 */
2843
2844 /* Code for the non-UTF-8 case for minimum matching of operators other
2845 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2846 number of bytes present, as this was tested above. */
2847
2848 switch(ctype)
2849 {
2850 case OP_ANY:
2851 if ((ims & PCRE_DOTALL) == 0)
2852 {
2853 for (i = 1; i <= min; i++)
2854 {
2855 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2856 eptr++;
2857 }
2858 }
2859 else eptr += min;
2860 break;
2861
2862 case OP_ANYBYTE:
2863 eptr += min;
2864 break;
2865
2866 /* Because of the CRLF case, we can't assume the minimum number of
2867 bytes are present in this case. */
2868
2869 case OP_ANYNL:
2870 for (i = 1; i <= min; i++)
2871 {
2872 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2873 switch(*eptr++)
2874 {
2875 default: RRETURN(MATCH_NOMATCH);
2876 case 0x000d:
2877 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2878 break;
2879 case 0x000a:
2880 case 0x000b:
2881 case 0x000c:
2882 case 0x0085:
2883 break;
2884 }
2885 }
2886 break;
2887
2888 case OP_NOT_DIGIT:
2889 for (i = 1; i <= min; i++)
2890 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2891 break;
2892
2893 case OP_DIGIT:
2894 for (i = 1; i <= min; i++)
2895 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2896 break;
2897
2898 case OP_NOT_WHITESPACE:
2899 for (i = 1; i <= min; i++)
2900 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2901 break;
2902
2903 case OP_WHITESPACE:
2904 for (i = 1; i <= min; i++)
2905 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2906 break;
2907
2908 case OP_NOT_WORDCHAR:
2909 for (i = 1; i <= min; i++)
2910 if ((md->ctypes[*eptr++] & ctype_word) != 0)
2911 RRETURN(MATCH_NOMATCH);
2912 break;
2913
2914 case OP_WORDCHAR:
2915 for (i = 1; i <= min; i++)
2916 if ((md->ctypes[*eptr++] & ctype_word) == 0)
2917 RRETURN(MATCH_NOMATCH);
2918 break;
2919
2920 default:
2921 RRETURN(PCRE_ERROR_INTERNAL);
2922 }
2923 }
2924
2925 /* If min = max, continue at the same level without recursing */
2926
2927 if (min == max) continue;
2928
2929 /* If minimizing, we have to test the rest of the pattern before each
2930 subsequent match. Again, separate the UTF-8 case for speed, and also
2931 separate the UCP cases. */
2932
2933 if (minimize)
2934 {
2935 #ifdef SUPPORT_UCP
2936 if (prop_type >= 0)
2937 {
2938 switch(prop_type)
2939 {
2940 case PT_ANY:
2941 for (fi = min;; fi++)
2942 {
2943 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2944 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2945 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2946 GETCHARINC(c, eptr);
2947 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2948 }
2949 /* Control never gets here */
2950
2951 case PT_LAMP:
2952 for (fi = min;; fi++)
2953 {
2954 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2955 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2956 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2957 GETCHARINC(c, eptr);
2958 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2959 if ((prop_chartype == ucp_Lu ||
2960 prop_chartype == ucp_Ll ||
2961 prop_chartype == ucp_Lt) == prop_fail_result)
2962 RRETURN(MATCH_NOMATCH);
2963 }
2964 /* Control never gets here */
2965
2966 case PT_GC:
2967 for (fi = min;; fi++)
2968 {
2969 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2970 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2971 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972 GETCHARINC(c, eptr);
2973 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2974 if ((prop_category == prop_value) == prop_fail_result)
2975 RRETURN(MATCH_NOMATCH);
2976 }
2977 /* Control never gets here */
2978
2979 case PT_PC:
2980 for (fi = min;; fi++)
2981 {
2982 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2983 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2984 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2985 GETCHARINC(c, eptr);
2986 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2987 if ((prop_chartype == prop_value) == prop_fail_result)
2988 RRETURN(MATCH_NOMATCH);
2989 }
2990 /* Control never gets here */
2991
2992 case PT_SC:
2993 for (fi = min;; fi++)
2994 {
2995 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2996 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998 GETCHARINC(c, eptr);
2999 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3000 if ((prop_script == prop_value) == prop_fail_result)
3001 RRETURN(MATCH_NOMATCH);
3002 }
3003 /* Control never gets here */
3004
3005 default:
3006 RRETURN(PCRE_ERROR_INTERNAL);
3007 }
3008 }
3009
3010 /* Match extended Unicode sequences. We will get here only if the
3011 support is in the binary; otherwise a compile-time error occurs. */
3012
3013 else if (ctype == OP_EXTUNI)
3014 {
3015 for (fi = min;; fi++)
3016 {
3017 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3018 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3019 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3020 GETCHARINCTEST(c, eptr);
3021 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3022 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3023 while (eptr < md->end_subject)
3024 {
3025 int len = 1;
3026 if (!utf8) c = *eptr; else
3027 {
3028 GETCHARLEN(c, eptr, len);
3029 }
3030 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3031 if (prop_category != ucp_M) break;
3032 eptr += len;
3033 }
3034 }
3035 }
3036
3037 else
3038 #endif /* SUPPORT_UCP */
3039
3040 #ifdef SUPPORT_UTF8
3041 /* UTF-8 mode */
3042 if (utf8)
3043 {
3044 for (fi = min;; fi++)
3045 {
3046 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3047 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3048 if (fi >= max || eptr >= md->end_subject ||
3049 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3050 IS_NEWLINE(eptr)))
3051 RRETURN(MATCH_NOMATCH);
3052
3053 GETCHARINC(c, eptr);
3054 switch(ctype)
3055 {
3056 case OP_ANY: /* This is the DOTALL case */
3057 break;
3058
3059 case OP_ANYBYTE:
3060 break;
3061
3062 case OP_ANYNL:
3063 switch(c)
3064 {
3065 default: RRETURN(MATCH_NOMATCH);
3066 case 0x000d:
3067 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3068 break;
3069 case 0x000a:
3070 case 0x000b:
3071 case 0x000c:
3072 case 0x0085:
3073 case 0x2028:
3074 case 0x2029:
3075 break;
3076 }
3077 break;
3078
3079 case OP_NOT_DIGIT:
3080 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3081 RRETURN(MATCH_NOMATCH);
3082 break;
3083
3084 case OP_DIGIT:
3085 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3086 RRETURN(MATCH_NOMATCH);
3087 break;
3088
3089 case OP_NOT_WHITESPACE:
3090 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3091 RRETURN(MATCH_NOMATCH);
3092 break;
3093
3094 case OP_WHITESPACE:
3095 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3096 RRETURN(MATCH_NOMATCH);
3097 break;
3098
3099 case OP_NOT_WORDCHAR:
3100 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3101 RRETURN(MATCH_NOMATCH);
3102 break;
3103
3104 case OP_WORDCHAR:
3105 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3106 RRETURN(MATCH_NOMATCH);
3107 break;
3108
3109 default:
3110 RRETURN(PCRE_ERROR_INTERNAL);
3111 }
3112 }
3113 }
3114 else
3115 #endif
3116 /* Not UTF-8 mode */
3117 {
3118 for (fi = min;; fi++)
3119 {
3120 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3121 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3122 if (fi >= max || eptr >= md->end_subject ||
3123 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3124 RRETURN(MATCH_NOMATCH);
3125
3126 c = *eptr++;
3127 switch(ctype)
3128 {
3129 case OP_ANY: /* This is the DOTALL case */
3130 break;
3131
3132 case OP_ANYBYTE:
3133 break;
3134
3135 case OP_ANYNL:
3136 switch(c)
3137 {
3138 default: RRETURN(MATCH_NOMATCH);
3139 case 0x000d:
3140 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3141 break;
3142 case 0x000a:
3143 case 0x000b:
3144 case 0x000c:
3145 case 0x0085:
3146 break;
3147 }
3148 break;
3149
3150 case OP_NOT_DIGIT:
3151 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3152 break;
3153
3154 case OP_DIGIT:
3155 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3156 break;
3157
3158 case OP_NOT_WHITESPACE:
3159 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3160 break;
3161
3162 case OP_WHITESPACE:
3163 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3164 break;
3165
3166 case OP_NOT_WORDCHAR:
3167 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3168 break;
3169
3170 case OP_WORDCHAR:
3171 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3172 break;
3173
3174 default:
3175 RRETURN(PCRE_ERROR_INTERNAL);
3176 }
3177 }
3178 }
3179 /* Control never gets here */
3180 }
3181
3182 /* If maximizing, it is worth using inline code for speed, doing the type
3183 test once at the start (i.e. keep it out of the loop). Again, keep the
3184 UTF-8 and UCP stuff separate. */
3185
3186 else
3187 {
3188 pp = eptr; /* Remember where we started */
3189
3190 #ifdef SUPPORT_UCP
3191 if (prop_type >= 0)
3192 {
3193 switch(prop_type)
3194 {
3195 case PT_ANY:
3196 for (i = min; i < max; i++)
3197 {
3198 int len = 1;
3199 if (eptr >= md->end_subject) break;
3200 GETCHARLEN(c, eptr, len);
3201 if (prop_fail_result) break;
3202 eptr+= len;
3203 }
3204 break;
3205
3206 case PT_LAMP:
3207 for (i = min; i < max; i++)
3208 {
3209 int len = 1;
3210 if (eptr >= md->end_subject) break;
3211 GETCHARLEN(c, eptr, len);
3212 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3213 if ((prop_chartype == ucp_Lu ||
3214 prop_chartype == ucp_Ll ||
3215 prop_chartype == ucp_Lt) == prop_fail_result)
3216 break;
3217 eptr+= len;
3218 }
3219 break;
3220
3221 case PT_GC:
3222 for (i = min; i < max; i++)
3223 {
3224 int len = 1;
3225 if (eptr >= md->end_subject) break;
3226 GETCHARLEN(c, eptr, len);
3227 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3228 if ((prop_category == prop_value) == prop_fail_result)
3229 break;
3230 eptr+= len;
3231 }
3232 break;
3233
3234 case PT_PC:
3235 for (i = min; i < max; i++)
3236 {
3237 int len = 1;
3238 if (eptr >= md->end_subject) break;
3239 GETCHARLEN(c, eptr, len);
3240 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3241 if ((prop_chartype == prop_value) == prop_fail_result)
3242 break;
3243 eptr+= len;
3244 }
3245 break;
3246
3247 case PT_SC:
3248 for (i = min; i < max; i++)
3249 {
3250 int len = 1;
3251 if (eptr >= md->end_subject) break;
3252 GETCHARLEN(c, eptr, len);
3253 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3254 if ((prop_script == prop_value) == prop_fail_result)
3255 break;
3256 eptr+= len;
3257 }
3258 break;
3259 }
3260
3261 /* eptr is now past the end of the maximum run */
3262
3263 if (possessive) continue;
3264 for(;;)
3265 {
3266 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3267 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3268 if (eptr-- == pp) break; /* Stop if tried at original pos */
3269 BACKCHAR(eptr);
3270 }
3271 }
3272
3273 /* Match extended Unicode sequences. We will get here only if the
3274 support is in the binary; otherwise a compile-time error occurs. */
3275
3276 else if (ctype == OP_EXTUNI)
3277 {
3278 for (i = min; i < max; i++)
3279 {
3280 if (eptr >= md->end_subject) break;
3281 GETCHARINCTEST(c, eptr);
3282 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3283 if (prop_category == ucp_M) break;
3284 while (eptr < md->end_subject)
3285 {
3286 int len = 1;
3287 if (!utf8) c = *eptr; else
3288 {
3289 GETCHARLEN(c, eptr, len);
3290 }
3291 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3292 if (prop_category != ucp_M) break;
3293 eptr += len;
3294 }
3295 }
3296
3297 /* eptr is now past the end of the maximum run */
3298
3299 if (possessive) continue;
3300 for(;;)
3301 {
3302 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3303 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3304 if (eptr-- == pp) break; /* Stop if tried at original pos */
3305 for (;;) /* Move back over one extended */
3306 {
3307 int len = 1;
3308 BACKCHAR(eptr);
3309 if (!utf8) c = *eptr; else
3310 {
3311 GETCHARLEN(c, eptr, len);
3312 }
3313 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3314 if (prop_category != ucp_M) break;
3315 eptr--;
3316 }
3317 }
3318 }
3319
3320 else
3321 #endif /* SUPPORT_UCP */
3322
3323 #ifdef SUPPORT_UTF8
3324 /* UTF-8 mode */
3325
3326 if (utf8)
3327 {
3328 switch(ctype)
3329 {
3330 case OP_ANY:
3331
3332 /* Special code is required for UTF8, but when the maximum is
3333 unlimited we don't need it, so we repeat the non-UTF8 code. This is
3334 probably worth it, because .* is quite a common idiom. */
3335
3336 if (max < INT_MAX)
3337 {
3338 if ((ims & PCRE_DOTALL) == 0)
3339 {
3340 for (i = min; i < max; i++)
3341 {
3342 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3343 eptr++;
3344 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3345 }
3346 }
3347 else
3348 {
3349 for (i = min; i < max; i++)
3350 {
3351 if (eptr >= md->end_subject) break;
3352 eptr++;
3353 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3354 }
3355 }
3356 }
3357
3358 /* Handle unlimited UTF-8 repeat */
3359
3360 else
3361 {
3362 if ((ims & PCRE_DOTALL) == 0)
3363 {
3364 for (i = min; i < max; i++)
3365 {
3366 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3367 eptr++;
3368 }
3369 break;
3370 }
3371 else
3372 {
3373 c = max - min;
3374 if (c > (unsigned int)(md->end_subject - eptr))
3375 c = md->end_subject - eptr;
3376 eptr += c;
3377 }
3378 }
3379 break;
3380
3381 /* The byte case is the same as non-UTF8 */
3382
3383 case OP_ANYBYTE:
3384 c = max - min;
3385 if (c > (unsigned int)(md->end_subject - eptr))
3386 c = md->end_subject - eptr;
3387 eptr += c;
3388 break;
3389
3390 case OP_ANYNL:
3391 for (i = min; i < max; i++)
3392 {
3393 int len = 1;
3394 if (eptr >= md->end_subject) break;
3395 GETCHARLEN(c, eptr, len);
3396 if (c == 0x000d)
3397 {
3398 if (++eptr >= md->end_subject) break;
3399 if (*eptr == 0x000a) eptr++;
3400 }
3401 else
3402 {
3403 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3404 c != 0x0085 && c != 0x2028 && c != 0x2029)
3405 break;
3406 eptr += len;
3407 }
3408 }
3409 break;
3410
3411 case OP_NOT_DIGIT:
3412 for (i = min; i < max; i++)
3413 {
3414 int len = 1;
3415 if (eptr >= md->end_subject) break;
3416 GETCHARLEN(c, eptr, len);
3417 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3418 eptr+= len;
3419 }
3420 break;
3421
3422 case OP_DIGIT:
3423 for (i = min; i < max; i++)
3424 {
3425 int len = 1;
3426 if (eptr >= md->end_subject) break;
3427 GETCHARLEN(c, eptr, len);
3428 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3429 eptr+= len;
3430 }
3431 break;
3432
3433 case OP_NOT_WHITESPACE:
3434 for (i = min; i < max; i++)
3435 {
3436 int len = 1;
3437 if (eptr >= md->end_subject) break;
3438 GETCHARLEN(c, eptr, len);
3439 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3440 eptr+= len;
3441 }
3442 break;
3443
3444 case OP_WHITESPACE:
3445 for (i = min; i < max; i++)
3446 {
3447 int len = 1;
3448 if (eptr >= md->end_subject) break;
3449 GETCHARLEN(c, eptr, len);
3450 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3451 eptr+= len;
3452 }
3453 break;
3454
3455 case OP_NOT_WORDCHAR:
3456 for (i = min; i < max; i++)
3457 {
3458 int len = 1;
3459 if (eptr >= md->end_subject) break;
3460 GETCHARLEN(c, eptr, len);
3461 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3462 eptr+= len;
3463 }
3464 break;
3465
3466 case OP_WORDCHAR:
3467 for (i = min; i < max; i++)
3468 {
3469 int len = 1;
3470 if (eptr >= md->end_subject) break;
3471 GETCHARLEN(c, eptr, len);
3472 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3473 eptr+= len;
3474 }
3475 break;
3476
3477 default:
3478 RRETURN(PCRE_ERROR_INTERNAL);
3479 }
3480
3481 /* eptr is now past the end of the maximum run */
3482
3483 if (possessive) continue;
3484 for(;;)
3485 {
3486 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3487 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3488 if (eptr-- == pp) break; /* Stop if tried at original pos */
3489 BACKCHAR(eptr);
3490 }
3491 }
3492 else
3493 #endif
3494
3495 /* Not UTF-8 mode */
3496 {
3497 switch(ctype)
3498 {
3499 case OP_ANY:
3500 if ((ims & PCRE_DOTALL) == 0)
3501 {
3502 for (i = min; i < max; i++)
3503 {
3504 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3505 eptr++;
3506 }
3507 break;
3508 }
3509 /* For DOTALL case, fall through and treat as \C */
3510
3511 case OP_ANYBYTE:
3512 c = max - min;
3513 if (c > (unsigned int)(md->end_subject - eptr))
3514 c = md->end_subject - eptr;
3515 eptr += c;
3516 break;
3517
3518 case OP_ANYNL:
3519 for (i = min; i < max; i++)
3520 {
3521 if (eptr >= md->end_subject) break;
3522 c = *eptr;
3523 if (c == 0x000d)
3524 {
3525 if (++eptr >= md->end_subject) break;
3526 if (*eptr == 0x000a) eptr++;
3527 }
3528 else
3529 {
3530 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3531 break;
3532 eptr++;
3533 }
3534 }
3535 break;
3536
3537 case OP_NOT_DIGIT:
3538 for (i = min; i < max; i++)
3539 {
3540 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3541 break;
3542 eptr++;
3543 }
3544 break;
3545
3546 case OP_DIGIT:
3547 for (i = min; i < max; i++)
3548 {
3549 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3550 break;
3551 eptr++;
3552 }
3553 break;
3554
3555 case OP_NOT_WHITESPACE:
3556 for (i = min; i < max; i++)
3557 {
3558 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3559 break;
3560 eptr++;
3561 }
3562 break;
3563
3564 case OP_WHITESPACE:
3565 for (i = min; i < max; i++)
3566 {
3567 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3568 break;
3569 eptr++;
3570 }
3571 break;
3572
3573 case OP_NOT_WORDCHAR:
3574 for (i = min; i < max; i++)
3575 {
3576 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3577 break;
3578 eptr++;
3579 }
3580 break;
3581
3582 case OP_WORDCHAR:
3583 for (i = min; i < max; i++)
3584 {
3585 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3586 break;
3587 eptr++;
3588 }
3589 break;
3590
3591 default:
3592 RRETURN(PCRE_ERROR_INTERNAL);
3593 }
3594
3595 /* eptr is now past the end of the maximum run */
3596
3597 if (possessive) continue;
3598 while (eptr >= pp)
3599 {
3600 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3601 eptr--;
3602 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3603 }
3604 }
3605
3606 /* Get here if we can't make it match with any permitted repetitions */
3607
3608 RRETURN(MATCH_NOMATCH);
3609 }
3610 /* Control never gets here */
3611
3612 /* There's been some horrible disaster. Arrival here can only mean there is
3613 something seriously wrong in the code above or the OP_xxx definitions. */
3614
3615 default:
3616 DPRINTF(("Unknown opcode %d\n", *ecode));
3617 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3618 }
3619
3620 /* Do not stick any code in here without much thought; it is assumed
3621 that "continue" in the code above comes out to here to repeat the main
3622 loop. */
3623
3624 } /* End of main loop */
3625 /* Control never reaches here */
3626 }
3627
3628
3629 /***************************************************************************
3630 ****************************************************************************
3631 RECURSION IN THE match() FUNCTION
3632
3633 Undefine all the macros that were defined above to handle this. */
3634
3635 #ifdef NO_RECURSE
3636 #undef eptr
3637 #undef ecode
3638 #undef offset_top
3639 #undef ims
3640 #undef eptrb
3641 #undef flags
3642
3643 #undef callpat
3644 #undef charptr
3645 #undef data
3646 #undef next
3647 #undef pp
3648 #undef prev
3649 #undef saved_eptr
3650
3651 #undef new_recursive
3652
3653 #undef cur_is_word
3654 #undef condition
3655 #undef prev_is_word
3656
3657 #undef original_ims
3658
3659 #undef ctype
3660 #undef length
3661 #undef max
3662 #undef min
3663 #undef number
3664 #undef offset
3665 #undef op
3666 #undef save_capture_last
3667 #undef save_offset1
3668 #undef save_offset2
3669 #undef save_offset3
3670 #undef stacksave
3671
3672 #undef newptrb
3673
3674 #endif
3675
3676 /* These two are defined as macros in both cases */
3677
3678 #undef fc
3679 #undef fi
3680
3681 /***************************************************************************
3682 ***************************************************************************/
3683
3684
3685
3686 /*************************************************
3687 * Execute a Regular Expression *
3688 *************************************************/
3689
3690 /* This function applies a compiled re to a subject string and picks out
3691 portions of the string if it matches. Two elements in the vector are set for
3692 each substring: the offsets to the start and end of the substring.
3693
3694 Arguments:
3695 argument_re points to the compiled expression
3696 extra_data points to extra data or is NULL
3697 subject points to the subject string
3698 length length of subject string (may contain binary zeros)
3699 start_offset where to start in the subject string
3700 options option bits
3701 offsets points to a vector of ints to be filled in with offsets
3702 offsetcount the number of elements in the vector
3703
3704 Returns: > 0 => success; value is the number of elements filled in
3705 = 0 => success, but offsets is not big enough
3706 -1 => failed to match
3707 < -1 => some kind of unexpected problem
3708 */
3709
3710 PCRE_DATA_SCOPE int
3711 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3712 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3713 int offsetcount)
3714 {
3715 int rc, resetcount, ocount;
3716 int first_byte = -1;
3717 int req_byte = -1;
3718 int req_byte2 = -1;
3719 int newline;
3720 unsigned long int ims;
3721 BOOL using_temporary_offsets = FALSE;
3722 BOOL anchored;
3723 BOOL startline;
3724 BOOL firstline;
3725 BOOL first_byte_caseless = FALSE;
3726 BOOL req_byte_caseless = FALSE;
3727 BOOL utf8;
3728 match_data match_block;
3729 match_data *md = &match_block;
3730 const uschar *tables;
3731 const uschar *start_bits = NULL;
3732 USPTR start_match = (USPTR)subject + start_offset;
3733 USPTR end_subject;
3734 USPTR req_byte_ptr = start_match - 1;
3735 eptrblock eptrchain[EPTR_WORK_SIZE];
3736
3737 pcre_study_data internal_study;
3738 const pcre_study_data *study;
3739
3740 real_pcre internal_re;
3741 const real_pcre *external_re = (const real_pcre *)argument_re;
3742 const real_pcre *re = external_re;
3743
3744 /* Plausibility checks */
3745
3746 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3747 if (re == NULL || subject == NULL ||
3748 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3749 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3750
3751 /* Fish out the optional data from the extra_data structure, first setting
3752 the default values. */
3753
3754 study = NULL;
3755 md->match_limit = MATCH_LIMIT;
3756 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3757 md->callout_data = NULL;
3758
3759 /* The table pointer is always in native byte order. */
3760
3761 tables = external_re->tables;
3762
3763 if (extra_data != NULL)
3764 {
3765 register unsigned int flags = extra_data->flags;
3766 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3767 study = (const pcre_study_data *)extra_data->study_data;
3768 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3769 md->match_limit = extra_data->match_limit;
3770 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3771 md->match_limit_recursion = extra_data->match_limit_recursion;
3772 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3773 md->callout_data = extra_data->callout_data;
3774 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3775 }
3776
3777 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3778 is a feature that makes it possible to save compiled regex and re-use them
3779 in other programs later. */
3780
3781 if (tables == NULL) tables = _pcre_default_tables;
3782
3783 /* Check that the first field in the block is the magic number. If it is not,
3784 test for a regex that was compiled on a host of opposite endianness. If this is
3785 the case, flipped values are put in internal_re and internal_study if there was
3786 study data too. */
3787
3788 if (re->magic_number != MAGIC_NUMBER)
3789 {
3790 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3791 if (re == NULL) return PCRE_ERROR_BADMAGIC;
3792 if (study != NULL) study = &internal_study;
3793 }
3794
3795 /* Set up other data */
3796
3797 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3798 startline = (re->options & PCRE_STARTLINE) != 0;
3799 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3800
3801 /* The code starts after the real_pcre block and the capture name table. */
3802
3803 md->start_code = (const uschar *)external_re + re->name_table_offset +
3804 re->name_count * re->name_entry_size;
3805
3806 md->start_subject = (USPTR)subject;
3807 md->start_offset = start_offset;
3808 md->end_subject = md->start_subject + length;
3809 end_subject = md->end_subject;
3810
3811 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3812 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3813
3814 md->notbol = (options & PCRE_NOTBOL) != 0;
3815 md->noteol = (options & PCRE_NOTEOL) != 0;
3816 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3817 md->partial = (options & PCRE_PARTIAL) != 0;
3818 md->hitend = FALSE;
3819
3820 md->recursive = NULL; /* No recursion at top level */
3821 md->eptrchain = eptrchain; /* Make workspace generally available */
3822
3823 md->lcc = tables + lcc_offset;
3824 md->ctypes = tables + ctypes_offset;
3825
3826 /* Handle different types of newline. The three bits give eight cases. If
3827 nothing is set at run time, whatever was used at compile time applies. */
3828
3829 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3830 PCRE_NEWLINE_BITS)
3831 {
3832 case 0: newline = NEWLINE; break; /* Compile-time default */
3833 case PCRE_NEWLINE_CR: newline = '\r'; break;
3834 case PCRE_NEWLINE_LF: newline = '\n'; break;
3835 case PCRE_NEWLINE_CR+
3836 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3837 case PCRE_NEWLINE_ANY: newline = -1; break;
3838 default: return PCRE_ERROR_BADNEWLINE;
3839 }
3840
3841 if (newline < 0)
3842 {
3843 md->nltype = NLTYPE_ANY;
3844 }
3845 else
3846 {
3847 md->nltype = NLTYPE_FIXED;
3848 if (newline > 255)
3849 {
3850 md->nllen = 2;
3851 md->nl[0] = (newline >> 8) & 255;
3852 md->nl[1] = newline & 255;
3853 }
3854 else
3855 {
3856 md->nllen = 1;
3857 md->nl[0] = newline;
3858 }
3859 }
3860
3861 /* Partial matching is supported only for a restricted set of regexes at the
3862 moment. */
3863
3864 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3865 return PCRE_ERROR_BADPARTIAL;
3866
3867 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3868 back the character offset. */
3869
3870 #ifdef SUPPORT_UTF8
3871 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3872 {
3873 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3874 return PCRE_ERROR_BADUTF8;
3875 if (start_offset > 0 && start_offset < length)
3876 {
3877 int tb = ((uschar *)subject)[start_offset];
3878 if (tb > 127)
3879 {
3880 tb &= 0xc0;
3881 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3882 }
3883 }
3884 }
3885 #endif
3886
3887 /* The ims options can vary during the matching as a result of the presence
3888 of (?ims) items in the pattern. They are kept in a local variable so that
3889 restoring at the exit of a group is easy. */
3890
3891 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3892
3893 /* If the expression has got more back references than the offsets supplied can
3894 hold, we get a temporary chunk of working store to use during the matching.
3895 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3896 of 3. */
3897
3898 ocount = offsetcount - (offsetcount % 3);
3899
3900 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3901 {
3902 ocount = re->top_backref * 3 + 3;
3903 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3904 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3905 using_temporary_offsets = TRUE;
3906 DPRINTF(("Got memory to hold back references\n"));
3907 }
3908 else md->offset_vector = offsets;
3909
3910 md->offset_end = ocount;
3911 md->offset_max = (2*ocount)/3;
3912 md->offset_overflow = FALSE;
3913 md->capture_last = -1;
3914
3915 /* Compute the minimum number of offsets that we need to reset each time. Doing
3916 this makes a huge difference to execution time when there aren't many brackets
3917 in the pattern. */
3918
3919 resetcount = 2 + re->top_bracket * 2;
3920 if (resetcount > offsetcount) resetcount = ocount;
3921
3922 /* Reset the working variable associated with each extraction. These should
3923 never be used unless previously set, but they get saved and restored, and so we
3924 initialize them to avoid reading uninitialized locations. */
3925
3926 if (md->offset_vector != NULL)
3927 {
3928 register int *iptr = md->offset_vector + ocount;
3929 register int *iend = iptr - resetcount/2 + 1;
3930 while (--iptr >= iend) *iptr = -1;
3931 }
3932
3933 /* Set up the first character to match, if available. The first_byte value is
3934 never set for an anchored regular expression, but the anchoring may be forced
3935 at run time, so we have to test for anchoring. The first char may be unset for
3936 an unanchored pattern, of course. If there's no first char and the pattern was
3937 studied, there may be a bitmap of possible first characters. */
3938
3939 if (!anchored)
3940 {
3941 if ((re->options & PCRE_FIRSTSET) != 0)
3942 {
3943 first_byte = re->first_byte & 255;
3944 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3945 first_byte = md->lcc[first_byte];
3946 }
3947 else
3948 if (!startline && study != NULL &&
3949 (study->options & PCRE_STUDY_MAPPED) != 0)
3950 start_bits = study->start_bits;
3951 }
3952
3953 /* For anchored or unanchored matches, there may be a "last known required
3954 character" set. */
3955
3956 if ((re->options & PCRE_REQCHSET) != 0)
3957 {
3958 req_byte = re->req_byte & 255;
3959 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3960 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
3961 }
3962
3963
3964 /* ==========================================================================*/
3965
3966 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3967 the loop runs just once. */
3968
3969 for(;;)
3970 {
3971 USPTR save_end_subject = end_subject;
3972
3973 /* Reset the maximum number of extractions we might see. */
3974
3975 if (md->offset_vector != NULL)
3976 {
3977 register int *iptr = md->offset_vector;
3978 register int *iend = iptr + resetcount;
3979 while (iptr < iend) *iptr++ = -1;
3980 }
3981
3982 /* Advance to a unique first char if possible. If firstline is TRUE, the
3983 start of the match is constrained to the first line of a multiline string.
3984 That is, the match must be before or at the first newline. Implement this by
3985 temporarily adjusting end_subject so that we stop scanning at a newline. If
3986 the match fails at the newline, later code breaks this loop. */
3987
3988 if (firstline)
3989 {
3990 USPTR t = start_match;
3991 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3992 end_subject = t;
3993 }
3994
3995 /* Now test for a unique first byte */
3996
3997 if (first_byte >= 0)
3998 {
3999 if (first_byte_caseless)
4000 while (start_match < end_subject &&
4001 md->lcc[*start_match] != first_byte)
4002 start_match++;
4003 else
4004 while (start_match < end_subject && *start_match != first_byte)
4005 start_match++;
4006 }
4007
4008 /* Or to just after a linebreak for a multiline match if possible */
4009
4010 else if (startline)
4011 {
4012 if (start_match > md->start_subject + start_offset)
4013 {
4014 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4015 start_match++;
4016 }
4017 }
4018
4019 /* Or to a non-unique first char after study */
4020
4021 else if (start_bits != NULL)
4022 {
4023 while (start_match < end_subject)
4024 {
4025 register unsigned int c = *start_match;
4026 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4027 }
4028 }
4029
4030 /* Restore fudged end_subject */
4031
4032 end_subject = save_end_subject;
4033
4034 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4035 printf(">>>> Match against: ");
4036 pchars(start_match, end_subject - start_match, TRUE, md);
4037 printf("\n");
4038 #endif
4039
4040 /* If req_byte is set, we know that that character must appear in the subject
4041 for the match to succeed. If the first character is set, req_byte must be
4042 later in the subject; otherwise the test starts at the match point. This
4043 optimization can save a huge amount of backtracking in patterns with nested
4044 unlimited repeats that aren't going to match. Writing separate code for
4045 cased/caseless versions makes it go faster, as does using an autoincrement
4046 and backing off on a match.
4047
4048 HOWEVER: when the subject string is very, very long, searching to its end can
4049 take a long time, and give bad performance on quite ordinary patterns. This
4050 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4051 string... so we don't do this when the string is sufficiently long.
4052
4053 ALSO: this processing is disabled when partial matching is requested.
4054 */
4055
4056 if (req_byte >= 0 &&
4057 end_subject - start_match < REQ_BYTE_MAX &&
4058 !md->partial)
4059 {
4060 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4061
4062 /* We don't need to repeat the search if we haven't yet reached the
4063 place we found it at last time. */
4064
4065 if (p > req_byte_ptr)
4066 {
4067 if (req_byte_caseless)
4068 {
4069 while (p < end_subject)
4070 {
4071 register int pp = *p++;
4072 if (pp == req_byte || pp == req_byte2) { p--; break; }
4073 }
4074 }
4075 else
4076 {
4077 while (p < end_subject)
4078 {
4079 if (*p++ == req_byte) { p--; break; }
4080 }
4081 }
4082
4083 /* If we can't find the required character, break the matching loop,
4084 forcing a match failure. */
4085
4086 if (p >= end_subject)
4087 {
4088 rc = MATCH_NOMATCH;
4089 break;
4090 }
4091
4092 /* If we have found the required character, save the point where we
4093 found it, so that we don't search again next time round the loop if
4094 the start hasn't passed this character yet. */
4095
4096 req_byte_ptr = p;
4097 }
4098 }
4099
4100 /* OK, we can now run the match. */
4101
4102 md->start_match = start_match;
4103 md->match_call_count = 0;
4104 md->eptrn = 0; /* Next free eptrchain slot */
4105 rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4106
4107 /* Any return other than MATCH_NOMATCH breaks the loop. */
4108
4109 if (rc != MATCH_NOMATCH) break;
4110
4111 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4112 newline in the subject (though it may continue over the newline). Therefore,
4113 if we have just failed to match, starting at a newline, do not continue. */
4114
4115 if (firstline && IS_NEWLINE(start_match)) break;
4116
4117 /* Advance the match position by one character. */
4118
4119 start_match++;
4120 #ifdef SUPPORT_UTF8
4121 if (utf8)
4122 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4123 start_match++;
4124 #endif
4125
4126 /* Break the loop if the pattern is anchored or if we have passed the end of
4127 the subject. */
4128
4129 if (anchored || start_match > end_subject) break;
4130
4131 /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4132 are now at a LF, advance the match position by one more character. */
4133
4134 if (start_match[-1] == '\r' &&
4135 (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4136 start_match < end_subject &&
4137 *start_match == '\n')
4138 start_match++;
4139
4140 } /* End of for(;;) "bumpalong" loop */
4141
4142 /* ==========================================================================*/
4143
4144 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4145 conditions is true:
4146
4147 (1) The pattern is anchored;
4148
4149 (2) We are past the end of the subject;
4150
4151 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4152 this option requests that a match occur at or before the first newline in
4153 the subject.
4154
4155 When we have a match and the offset vector is big enough to deal with any
4156 backreferences, captured substring offsets will already be set up. In the case
4157 where we had to get some local store to hold offsets for backreference
4158 processing, copy those that we can. In this case there need not be overflow if
4159 certain parts of the pattern were not used, even though there are more
4160 capturing parentheses than vector slots. */
4161
4162 if (rc == MATCH_MATCH)
4163 {
4164 if (using_temporary_offsets)
4165 {
4166 if (offsetcount >= 4)
4167 {
4168 memcpy(offsets + 2, md->offset_vector + 2,
4169 (offsetcount - 2) * sizeof(int));
4170 DPRINTF(("Copied offsets from temporary memory\n"));
4171 }
4172 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4173 DPRINTF(("Freeing temporary memory\n"));
4174 (pcre_free)(md->offset_vector);
4175 }
4176
4177 /* Set the return code to the number of captured strings, or 0 if there are
4178 too many to fit into the vector. */
4179
4180 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4181
4182 /* If there is space, set up the whole thing as substring 0. */
4183
4184 if (offsetcount < 2) rc = 0; else
4185 {
4186 offsets[0] = start_match - md->start_subject;
4187 offsets[1] = md->end_match_ptr - md->start_subject;
4188 }
4189
4190 DPRINTF((">>>> returning %d\n", rc));
4191 return rc;
4192 }
4193
4194 /* Control gets here if there has been an error, or if the overall match
4195 attempt has failed at all permitted starting positions. */
4196
4197 if (using_temporary_offsets)
4198 {
4199 DPRINTF(("Freeing temporary memory\n"));
4200 (pcre_free)(md->offset_vector);
4201 }
4202
4203 if (rc != MATCH_NOMATCH)
4204 {
4205 DPRINTF((">>>> error: returning %d\n", rc));
4206 return rc;
4207 }
4208 else if (md->partial && md->hitend)
4209 {
4210 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4211 return PCRE_ERROR_PARTIAL;
4212 }
4213 else
4214 {
4215 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4216 return PCRE_ERROR_NOMATCH;
4217 }
4218 }
4219
4220 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12