/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 126514 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #define NLBLOCK md /* Block containing newline information */
46 #define PSSTART start_subject /* Field containing processed string start */
47 #define PSEND end_subject /* Field containing processed string end */
48
49 #include "pcre_internal.h"
50
51 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53
54 #define EPTR_WORK_SIZE (1000)
55
56 /* Flag bits for the match() function */
57
58 #define match_condassert 0x01 /* Called to check a condition assertion */
59 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
60 #define match_tail_recursed 0x04 /* Tail recursive call */
61
62 /* Non-error returns from the match() function. Error returns are externally
63 defined PCRE_ERROR_xxx codes, which are all negative. */
64
65 #define MATCH_MATCH 1
66 #define MATCH_NOMATCH 0
67
68 /* Maximum number of ints of offset to save on the stack for recursive calls.
69 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
70 because the offset vector is always a multiple of 3 long. */
71
72 #define REC_STACK_SAVE_MAX 30
73
74 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
75
76 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
77 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
78
79
80
81 #ifdef DEBUG
82 /*************************************************
83 * Debugging function to print chars *
84 *************************************************/
85
86 /* Print a sequence of chars in printable format, stopping at the end of the
87 subject if the requested.
88
89 Arguments:
90 p points to characters
91 length number to print
92 is_subject TRUE if printing from within md->start_subject
93 md pointer to matching data block, if is_subject is TRUE
94
95 Returns: nothing
96 */
97
98 static void
99 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100 {
101 unsigned int c;
102 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103 while (length-- > 0)
104 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
105 }
106 #endif
107
108
109
110 /*************************************************
111 * Match a back-reference *
112 *************************************************/
113
114 /* If a back reference hasn't been set, the length that is passed is greater
115 than the number of characters left in the string, so the match fails.
116
117 Arguments:
118 offset index into the offset vector
119 eptr points into the subject
120 length length to be matched
121 md points to match data block
122 ims the ims flags
123
124 Returns: TRUE if matched
125 */
126
127 static BOOL
128 match_ref(int offset, register USPTR eptr, int length, match_data *md,
129 unsigned long int ims)
130 {
131 USPTR p = md->start_subject + md->offset_vector[offset];
132
133 #ifdef DEBUG
134 if (eptr >= md->end_subject)
135 printf("matching subject <null>");
136 else
137 {
138 printf("matching subject ");
139 pchars(eptr, length, TRUE, md);
140 }
141 printf(" against backref ");
142 pchars(p, length, FALSE, md);
143 printf("\n");
144 #endif
145
146 /* Always fail if not enough characters left */
147
148 if (length > md->end_subject - eptr) return FALSE;
149
150 /* Separate the caselesss case for speed */
151
152 if ((ims & PCRE_CASELESS) != 0)
153 {
154 while (length-- > 0)
155 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
156 }
157 else
158 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
159
160 return TRUE;
161 }
162
163
164
165 /***************************************************************************
166 ****************************************************************************
167 RECURSION IN THE match() FUNCTION
168
169 The match() function is highly recursive, though not every recursive call
170 increases the recursive depth. Nevertheless, some regular expressions can cause
171 it to recurse to a great depth. I was writing for Unix, so I just let it call
172 itself recursively. This uses the stack for saving everything that has to be
173 saved for a recursive call. On Unix, the stack can be large, and this works
174 fine.
175
176 It turns out that on some non-Unix-like systems there are problems with
177 programs that use a lot of stack. (This despite the fact that every last chip
178 has oodles of memory these days, and techniques for extending the stack have
179 been known for decades.) So....
180
181 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182 calls by keeping local variables that need to be preserved in blocks of memory
183 obtained from malloc() instead instead of on the stack. Macros are used to
184 achieve this so that the actual code doesn't look very different to what it
185 always used to.
186 ****************************************************************************
187 ***************************************************************************/
188
189
190 /* These versions of the macros use the stack, as normal. There are debugging
191 versions and production versions. */
192
193 #ifndef NO_RECURSE
194 #define REGISTER register
195 #ifdef DEBUG
196 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197 { \
198 printf("match() called in line %d\n", __LINE__); \
199 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200 printf("to line %d\n", __LINE__); \
201 }
202 #define RRETURN(ra) \
203 { \
204 printf("match() returned %d from line %d ", ra, __LINE__); \
205 return ra; \
206 }
207 #else
208 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209 rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210 #define RRETURN(ra) return ra
211 #endif
212
213 #else
214
215
216 /* These versions of the macros manage a private stack on the heap. Note
217 that the rd argument of RMATCH isn't actually used. It's the md argument of
218 match(), which never changes. */
219
220 #define REGISTER
221
222 #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\
223 {\
224 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
225 if (setjmp(frame->Xwhere) == 0)\
226 {\
227 newframe->Xeptr = ra;\
228 newframe->Xecode = rb;\
229 newframe->Xoffset_top = rc;\
230 newframe->Xims = re;\
231 newframe->Xeptrb = rf;\
232 newframe->Xflags = rg;\
233 newframe->Xrdepth = frame->Xrdepth + 1;\
234 newframe->Xprevframe = frame;\
235 frame = newframe;\
236 DPRINTF(("restarting from line %d\n", __LINE__));\
237 goto HEAP_RECURSE;\
238 }\
239 else\
240 {\
241 DPRINTF(("longjumped back to line %d\n", __LINE__));\
242 frame = md->thisframe;\
243 rx = frame->Xresult;\
244 }\
245 }
246
247 #define RRETURN(ra)\
248 {\
249 heapframe *newframe = frame;\
250 frame = newframe->Xprevframe;\
251 (pcre_stack_free)(newframe);\
252 if (frame != NULL)\
253 {\
254 frame->Xresult = ra;\
255 md->thisframe = frame;\
256 longjmp(frame->Xwhere, 1);\
257 }\
258 return ra;\
259 }
260
261
262 /* Structure for remembering the local variables in a private frame */
263
264 typedef struct heapframe {
265 struct heapframe *Xprevframe;
266
267 /* Function arguments that may change */
268
269 const uschar *Xeptr;
270 const uschar *Xecode;
271 int Xoffset_top;
272 long int Xims;
273 eptrblock *Xeptrb;
274 int Xflags;
275 unsigned int Xrdepth;
276
277 /* Function local variables */
278
279 const uschar *Xcallpat;
280 const uschar *Xcharptr;
281 const uschar *Xdata;
282 const uschar *Xnext;
283 const uschar *Xpp;
284 const uschar *Xprev;
285 const uschar *Xsaved_eptr;
286
287 recursion_info Xnew_recursive;
288
289 BOOL Xcur_is_word;
290 BOOL Xcondition;
291 BOOL Xprev_is_word;
292
293 unsigned long int Xoriginal_ims;
294
295 #ifdef SUPPORT_UCP
296 int Xprop_type;
297 int Xprop_value;
298 int Xprop_fail_result;
299 int Xprop_category;
300 int Xprop_chartype;
301 int Xprop_script;
302 #endif
303
304 int Xctype;
305 unsigned int Xfc;
306 int Xfi;
307 int Xlength;
308 int Xmax;
309 int Xmin;
310 int Xnumber;
311 int Xoffset;
312 int Xop;
313 int Xsave_capture_last;
314 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
315 int Xstacksave[REC_STACK_SAVE_MAX];
316
317 eptrblock Xnewptrb;
318
319 /* Place to pass back result, and where to jump back to */
320
321 int Xresult;
322 jmp_buf Xwhere;
323
324 } heapframe;
325
326 #endif
327
328
329 /***************************************************************************
330 ***************************************************************************/
331
332
333
334 /*************************************************
335 * Match from current position *
336 *************************************************/
337
338 /* This function is called recursively in many circumstances. Whenever it
339 returns a negative (error) response, the outer incarnation must also return the
340 same response.
341
342 Performance note: It might be tempting to extract commonly used fields from the
343 md structure (e.g. utf8, end_subject) into individual variables to improve
344 performance. Tests using gcc on a SPARC disproved this; in the first case, it
345 made performance worse.
346
347 Arguments:
348 eptr pointer to current character in subject
349 ecode pointer to current position in compiled code
350 offset_top current top pointer
351 md pointer to "static" info for the match
352 ims current /i, /m, and /s options
353 eptrb pointer to chain of blocks containing eptr at start of
354 brackets - for testing for empty matches
355 flags can contain
356 match_condassert - this is an assertion condition
357 match_cbegroup - this is the start of an unlimited repeat
358 group that can match an empty string
359 match_tail_recursed - this is a tail_recursed group
360 rdepth the recursion depth
361
362 Returns: MATCH_MATCH if matched ) these values are >= 0
363 MATCH_NOMATCH if failed to match )
364 a negative PCRE_ERROR_xxx value if aborted by an error condition
365 (e.g. stopped by repeated call or recursion limit)
366 */
367
368 static int
369 match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
370 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
371 int flags, unsigned int rdepth)
372 {
373 /* These variables do not need to be preserved over recursion in this function,
374 so they can be ordinary variables in all cases. Mark some of them with
375 "register" because they are used a lot in loops. */
376
377 register int rrc; /* Returns from recursive calls */
378 register int i; /* Used for loops not involving calls to RMATCH() */
379 register unsigned int c; /* Character values not kept over RMATCH() calls */
380 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
381
382 BOOL minimize, possessive; /* Quantifier options */
383
384 /* When recursion is not being used, all "local" variables that have to be
385 preserved over calls to RMATCH() are part of a "frame" which is obtained from
386 heap storage. Set up the top-level frame here; others are obtained from the
387 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
388
389 #ifdef NO_RECURSE
390 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
391 frame->Xprevframe = NULL; /* Marks the top level */
392
393 /* Copy in the original argument variables */
394
395 frame->Xeptr = eptr;
396 frame->Xecode = ecode;
397 frame->Xoffset_top = offset_top;
398 frame->Xims = ims;
399 frame->Xeptrb = eptrb;
400 frame->Xflags = flags;
401 frame->Xrdepth = rdepth;
402
403 /* This is where control jumps back to to effect "recursion" */
404
405 HEAP_RECURSE:
406
407 /* Macros make the argument variables come from the current frame */
408
409 #define eptr frame->Xeptr
410 #define ecode frame->Xecode
411 #define offset_top frame->Xoffset_top
412 #define ims frame->Xims
413 #define eptrb frame->Xeptrb
414 #define flags frame->Xflags
415 #define rdepth frame->Xrdepth
416
417 /* Ditto for the local variables */
418
419 #ifdef SUPPORT_UTF8
420 #define charptr frame->Xcharptr
421 #endif
422 #define callpat frame->Xcallpat
423 #define data frame->Xdata
424 #define next frame->Xnext
425 #define pp frame->Xpp
426 #define prev frame->Xprev
427 #define saved_eptr frame->Xsaved_eptr
428
429 #define new_recursive frame->Xnew_recursive
430
431 #define cur_is_word frame->Xcur_is_word
432 #define condition frame->Xcondition
433 #define prev_is_word frame->Xprev_is_word
434
435 #define original_ims frame->Xoriginal_ims
436
437 #ifdef SUPPORT_UCP
438 #define prop_type frame->Xprop_type
439 #define prop_value frame->Xprop_value
440 #define prop_fail_result frame->Xprop_fail_result
441 #define prop_category frame->Xprop_category
442 #define prop_chartype frame->Xprop_chartype
443 #define prop_script frame->Xprop_script
444 #endif
445
446 #define ctype frame->Xctype
447 #define fc frame->Xfc
448 #define fi frame->Xfi
449 #define length frame->Xlength
450 #define max frame->Xmax
451 #define min frame->Xmin
452 #define number frame->Xnumber
453 #define offset frame->Xoffset
454 #define op frame->Xop
455 #define save_capture_last frame->Xsave_capture_last
456 #define save_offset1 frame->Xsave_offset1
457 #define save_offset2 frame->Xsave_offset2
458 #define save_offset3 frame->Xsave_offset3
459 #define stacksave frame->Xstacksave
460
461 #define newptrb frame->Xnewptrb
462
463 /* When recursion is being used, local variables are allocated on the stack and
464 get preserved during recursion in the normal way. In this environment, fi and
465 i, and fc and c, can be the same variables. */
466
467 #else /* NO_RECURSE not defined */
468 #define fi i
469 #define fc c
470
471
472 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
473 const uschar *charptr; /* in small blocks of the code. My normal */
474 #endif /* style of coding would have declared */
475 const uschar *callpat; /* them within each of those blocks. */
476 const uschar *data; /* However, in order to accommodate the */
477 const uschar *next; /* version of this code that uses an */
478 USPTR pp; /* external "stack" implemented on the */
479 const uschar *prev; /* heap, it is easier to declare them all */
480 USPTR saved_eptr; /* here, so the declarations can be cut */
481 /* out in a block. The only declarations */
482 recursion_info new_recursive; /* within blocks below are for variables */
483 /* that do not have to be preserved over */
484 BOOL cur_is_word; /* a recursive call to RMATCH(). */
485 BOOL condition;
486 BOOL prev_is_word;
487
488 unsigned long int original_ims;
489
490 #ifdef SUPPORT_UCP
491 int prop_type;
492 int prop_value;
493 int prop_fail_result;
494 int prop_category;
495 int prop_chartype;
496 int prop_script;
497 #endif
498
499 int ctype;
500 int length;
501 int max;
502 int min;
503 int number;
504 int offset;
505 int op;
506 int save_capture_last;
507 int save_offset1, save_offset2, save_offset3;
508 int stacksave[REC_STACK_SAVE_MAX];
509
510 eptrblock newptrb;
511 #endif /* NO_RECURSE */
512
513 /* These statements are here to stop the compiler complaining about unitialized
514 variables. */
515
516 #ifdef SUPPORT_UCP
517 prop_value = 0;
518 prop_fail_result = 0;
519 #endif
520
521
522 /* This label is used for tail recursion, which is used in a few cases even
523 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
524 used. Thanks to Ian Taylor for noticing this possibility and sending the
525 original patch. */
526
527 TAIL_RECURSE:
528
529 /* OK, now we can get on with the real code of the function. Recursive calls
530 are specified by the macro RMATCH and RRETURN is used to return. When
531 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
532 and a "return", respectively (possibly with some debugging if DEBUG is
533 defined). However, RMATCH isn't like a function call because it's quite a
534 complicated macro. It has to be used in one particular way. This shouldn't,
535 however, impact performance when true recursion is being used. */
536
537 /* First check that we haven't called match() too many times, or that we
538 haven't exceeded the recursive call limit. */
539
540 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
541 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
542
543 original_ims = ims; /* Save for resetting on ')' */
544
545 #ifdef SUPPORT_UTF8
546 utf8 = md->utf8; /* Local copy of the flag */
547 #else
548 utf8 = FALSE;
549 #endif
550
551 /* At the start of a group with an unlimited repeat that may match an empty
552 string, the match_cbegroup flag is set. When this is the case, add the current
553 subject pointer to the chain of such remembered pointers, to be checked when we
554 hit the closing ket, in order to break infinite loops that match no characters.
555 When match() is called in other circumstances, don't add to the chain. If this
556 is a tail recursion, use a block from the workspace, as the one on the stack is
557 already used. */
558
559 if ((flags & match_cbegroup) != 0)
560 {
561 eptrblock *p;
562 if ((flags & match_tail_recursed) != 0)
563 {
564 if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
565 p = md->eptrchain + md->eptrn++;
566 }
567 else p = &newptrb;
568 p->epb_saved_eptr = eptr;
569 p->epb_prev = eptrb;
570 eptrb = p;
571 }
572
573 /* Now start processing the opcodes. */
574
575 for (;;)
576 {
577 minimize = possessive = FALSE;
578 op = *ecode;
579
580 /* For partial matching, remember if we ever hit the end of the subject after
581 matching at least one subject character. */
582
583 if (md->partial &&
584 eptr >= md->end_subject &&
585 eptr > md->start_match)
586 md->hitend = TRUE;
587
588 switch(op)
589 {
590 /* Handle a capturing bracket. If there is space in the offset vector, save
591 the current subject position in the working slot at the top of the vector.
592 We mustn't change the current values of the data slot, because they may be
593 set from a previous iteration of this group, and be referred to by a
594 reference inside the group.
595
596 If the bracket fails to match, we need to restore this value and also the
597 values of the final offsets, in case they were set by a previous iteration
598 of the same bracket.
599
600 If there isn't enough space in the offset vector, treat this as if it were
601 a non-capturing bracket. Don't worry about setting the flag for the error
602 case here; that is handled in the code for KET. */
603
604 case OP_CBRA:
605 case OP_SCBRA:
606 number = GET2(ecode, 1+LINK_SIZE);
607 offset = number << 1;
608
609 #ifdef DEBUG
610 printf("start bracket %d\n", number);
611 printf("subject=");
612 pchars(eptr, 16, TRUE, md);
613 printf("\n");
614 #endif
615
616 if (offset < md->offset_max)
617 {
618 save_offset1 = md->offset_vector[offset];
619 save_offset2 = md->offset_vector[offset+1];
620 save_offset3 = md->offset_vector[md->offset_end - number];
621 save_capture_last = md->capture_last;
622
623 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
624 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
625
626 flags = (op == OP_SCBRA)? match_cbegroup : 0;
627 do
628 {
629 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
630 ims, eptrb, flags);
631 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
632 md->capture_last = save_capture_last;
633 ecode += GET(ecode, 1);
634 }
635 while (*ecode == OP_ALT);
636
637 DPRINTF(("bracket %d failed\n", number));
638
639 md->offset_vector[offset] = save_offset1;
640 md->offset_vector[offset+1] = save_offset2;
641 md->offset_vector[md->offset_end - number] = save_offset3;
642
643 RRETURN(MATCH_NOMATCH);
644 }
645
646 /* Insufficient room for saving captured contents. Treat as a non-capturing
647 bracket. */
648
649 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
650
651 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
652 final alternative within the brackets, we would return the result of a
653 recursive call to match() whatever happened. We can reduce stack usage by
654 turning this into a tail recursion. */
655
656 case OP_BRA:
657 case OP_SBRA:
658 DPRINTF(("start non-capturing bracket\n"));
659 flags = (op >= OP_SBRA)? match_cbegroup : 0;
660 for (;;)
661 {
662 if (ecode[GET(ecode, 1)] != OP_ALT)
663 {
664 ecode += _pcre_OP_lengths[*ecode];
665 flags |= match_tail_recursed;
666 DPRINTF(("bracket 0 tail recursion\n"));
667 goto TAIL_RECURSE;
668 }
669
670 /* For non-final alternatives, continue the loop for a NOMATCH result;
671 otherwise return. */
672
673 RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
674 eptrb, flags);
675 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
676 ecode += GET(ecode, 1);
677 }
678 /* Control never reaches here. */
679
680 /* Conditional group: compilation checked that there are no more than
681 two branches. If the condition is false, skipping the first branch takes us
682 past the end if there is only one branch, but that's OK because that is
683 exactly what going to the ket would do. As there is only one branch to be
684 obeyed, we can use tail recursion to avoid using another stack frame. */
685
686 case OP_COND:
687 case OP_SCOND:
688 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
689 {
690 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
691 condition = md->recursive != NULL &&
692 (offset == RREF_ANY || offset == md->recursive->group_num);
693 ecode += condition? 3 : GET(ecode, 1);
694 }
695
696 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
697 {
698 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
699 condition = offset < offset_top && md->offset_vector[offset] >= 0;
700 ecode += condition? 3 : GET(ecode, 1);
701 }
702
703 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
704 {
705 condition = FALSE;
706 ecode += GET(ecode, 1);
707 }
708
709 /* The condition is an assertion. Call match() to evaluate it - setting
710 the final argument match_condassert causes it to stop at the end of an
711 assertion. */
712
713 else
714 {
715 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
716 match_condassert);
717 if (rrc == MATCH_MATCH)
718 {
719 condition = TRUE;
720 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
721 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
722 }
723 else if (rrc != MATCH_NOMATCH)
724 {
725 RRETURN(rrc); /* Need braces because of following else */
726 }
727 else
728 {
729 condition = FALSE;
730 ecode += GET(ecode, 1);
731 }
732 }
733
734 /* We are now at the branch that is to be obeyed. As there is only one,
735 we can use tail recursion to avoid using another stack frame. If the second
736 alternative doesn't exist, we can just plough on. */
737
738 if (condition || *ecode == OP_ALT)
739 {
740 ecode += 1 + LINK_SIZE;
741 flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
742 goto TAIL_RECURSE;
743 }
744 else
745 {
746 ecode += 1 + LINK_SIZE;
747 }
748 break;
749
750
751 /* End of the pattern. If we are in a top-level recursion, we should
752 restore the offsets appropriately and continue from after the call. */
753
754 case OP_END:
755 if (md->recursive != NULL && md->recursive->group_num == 0)
756 {
757 recursion_info *rec = md->recursive;
758 DPRINTF(("End of pattern in a (?0) recursion\n"));
759 md->recursive = rec->prevrec;
760 memmove(md->offset_vector, rec->offset_save,
761 rec->saved_max * sizeof(int));
762 md->start_match = rec->save_start;
763 ims = original_ims;
764 ecode = rec->after_call;
765 break;
766 }
767
768 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
769 string - backtracking will then try other alternatives, if any. */
770
771 if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);
772 md->end_match_ptr = eptr; /* Record where we ended */
773 md->end_offset_top = offset_top; /* and how many extracts were taken */
774 RRETURN(MATCH_MATCH);
775
776 /* Change option settings */
777
778 case OP_OPT:
779 ims = ecode[1];
780 ecode += 2;
781 DPRINTF(("ims set to %02lx\n", ims));
782 break;
783
784 /* Assertion brackets. Check the alternative branches in turn - the
785 matching won't pass the KET for an assertion. If any one branch matches,
786 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
787 start of each branch to move the current point backwards, so the code at
788 this level is identical to the lookahead case. */
789
790 case OP_ASSERT:
791 case OP_ASSERTBACK:
792 do
793 {
794 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
795 if (rrc == MATCH_MATCH) break;
796 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797 ecode += GET(ecode, 1);
798 }
799 while (*ecode == OP_ALT);
800 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
801
802 /* If checking an assertion for a condition, return MATCH_MATCH. */
803
804 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
805
806 /* Continue from after the assertion, updating the offsets high water
807 mark, since extracts may have been taken during the assertion. */
808
809 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
810 ecode += 1 + LINK_SIZE;
811 offset_top = md->end_offset_top;
812 continue;
813
814 /* Negative assertion: all branches must fail to match */
815
816 case OP_ASSERT_NOT:
817 case OP_ASSERTBACK_NOT:
818 do
819 {
820 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
821 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
822 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
823 ecode += GET(ecode,1);
824 }
825 while (*ecode == OP_ALT);
826
827 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
828
829 ecode += 1 + LINK_SIZE;
830 continue;
831
832 /* Move the subject pointer back. This occurs only at the start of
833 each branch of a lookbehind assertion. If we are too close to the start to
834 move back, this match function fails. When working with UTF-8 we move
835 back a number of characters, not bytes. */
836
837 case OP_REVERSE:
838 #ifdef SUPPORT_UTF8
839 if (utf8)
840 {
841 i = GET(ecode, 1);
842 while (i-- > 0)
843 {
844 eptr--;
845 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
846 BACKCHAR(eptr)
847 }
848 }
849 else
850 #endif
851
852 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
853
854 {
855 eptr -= GET(ecode, 1);
856 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
857 }
858
859 /* Skip to next op code */
860
861 ecode += 1 + LINK_SIZE;
862 break;
863
864 /* The callout item calls an external function, if one is provided, passing
865 details of the match so far. This is mainly for debugging, though the
866 function is able to force a failure. */
867
868 case OP_CALLOUT:
869 if (pcre_callout != NULL)
870 {
871 pcre_callout_block cb;
872 cb.version = 1; /* Version 1 of the callout block */
873 cb.callout_number = ecode[1];
874 cb.offset_vector = md->offset_vector;
875 cb.subject = (PCRE_SPTR)md->start_subject;
876 cb.subject_length = md->end_subject - md->start_subject;
877 cb.start_match = md->start_match - md->start_subject;
878 cb.current_position = eptr - md->start_subject;
879 cb.pattern_position = GET(ecode, 2);
880 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
881 cb.capture_top = offset_top/2;
882 cb.capture_last = md->capture_last;
883 cb.callout_data = md->callout_data;
884 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
885 if (rrc < 0) RRETURN(rrc);
886 }
887 ecode += 2 + 2*LINK_SIZE;
888 break;
889
890 /* Recursion either matches the current regex, or some subexpression. The
891 offset data is the offset to the starting bracket from the start of the
892 whole pattern. (This is so that it works from duplicated subpatterns.)
893
894 If there are any capturing brackets started but not finished, we have to
895 save their starting points and reinstate them after the recursion. However,
896 we don't know how many such there are (offset_top records the completed
897 total) so we just have to save all the potential data. There may be up to
898 65535 such values, which is too large to put on the stack, but using malloc
899 for small numbers seems expensive. As a compromise, the stack is used when
900 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
901 is used. A problem is what to do if the malloc fails ... there is no way of
902 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
903 values on the stack, and accept that the rest may be wrong.
904
905 There are also other values that have to be saved. We use a chained
906 sequence of blocks that actually live on the stack. Thanks to Robin Houston
907 for the original version of this logic. */
908
909 case OP_RECURSE:
910 {
911 callpat = md->start_code + GET(ecode, 1);
912 new_recursive.group_num = (callpat == md->start_code)? 0 :
913 GET2(callpat, 1 + LINK_SIZE);
914
915 /* Add to "recursing stack" */
916
917 new_recursive.prevrec = md->recursive;
918 md->recursive = &new_recursive;
919
920 /* Find where to continue from afterwards */
921
922 ecode += 1 + LINK_SIZE;
923 new_recursive.after_call = ecode;
924
925 /* Now save the offset data. */
926
927 new_recursive.saved_max = md->offset_end;
928 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
929 new_recursive.offset_save = stacksave;
930 else
931 {
932 new_recursive.offset_save =
933 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
934 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
935 }
936
937 memcpy(new_recursive.offset_save, md->offset_vector,
938 new_recursive.saved_max * sizeof(int));
939 new_recursive.save_start = md->start_match;
940 md->start_match = eptr;
941
942 /* OK, now we can do the recursion. For each top-level alternative we
943 restore the offset and recursion data. */
944
945 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
946 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
947 do
948 {
949 RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
950 md, ims, eptrb, flags);
951 if (rrc == MATCH_MATCH)
952 {
953 DPRINTF(("Recursion matched\n"));
954 md->recursive = new_recursive.prevrec;
955 if (new_recursive.offset_save != stacksave)
956 (pcre_free)(new_recursive.offset_save);
957 RRETURN(MATCH_MATCH);
958 }
959 else if (rrc != MATCH_NOMATCH)
960 {
961 DPRINTF(("Recursion gave error %d\n", rrc));
962 RRETURN(rrc);
963 }
964
965 md->recursive = &new_recursive;
966 memcpy(md->offset_vector, new_recursive.offset_save,
967 new_recursive.saved_max * sizeof(int));
968 callpat += GET(callpat, 1);
969 }
970 while (*callpat == OP_ALT);
971
972 DPRINTF(("Recursion didn't match\n"));
973 md->recursive = new_recursive.prevrec;
974 if (new_recursive.offset_save != stacksave)
975 (pcre_free)(new_recursive.offset_save);
976 RRETURN(MATCH_NOMATCH);
977 }
978 /* Control never reaches here */
979
980 /* "Once" brackets are like assertion brackets except that after a match,
981 the point in the subject string is not moved back. Thus there can never be
982 a move back into the brackets. Friedl calls these "atomic" subpatterns.
983 Check the alternative branches in turn - the matching won't pass the KET
984 for this kind of subpattern. If any one branch matches, we carry on as at
985 the end of a normal bracket, leaving the subject pointer. */
986
987 case OP_ONCE:
988 prev = ecode;
989 saved_eptr = eptr;
990
991 do
992 {
993 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
994 eptrb, 0);
995 if (rrc == MATCH_MATCH) break;
996 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
997 ecode += GET(ecode,1);
998 }
999 while (*ecode == OP_ALT);
1000
1001 /* If hit the end of the group (which could be repeated), fail */
1002
1003 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1004
1005 /* Continue as from after the assertion, updating the offsets high water
1006 mark, since extracts may have been taken. */
1007
1008 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1009
1010 offset_top = md->end_offset_top;
1011 eptr = md->end_match_ptr;
1012
1013 /* For a non-repeating ket, just continue at this level. This also
1014 happens for a repeating ket if no characters were matched in the group.
1015 This is the forcible breaking of infinite loops as implemented in Perl
1016 5.005. If there is an options reset, it will get obeyed in the normal
1017 course of events. */
1018
1019 if (*ecode == OP_KET || eptr == saved_eptr)
1020 {
1021 ecode += 1+LINK_SIZE;
1022 break;
1023 }
1024
1025 /* The repeating kets try the rest of the pattern or restart from the
1026 preceding bracket, in the appropriate order. The second "call" of match()
1027 uses tail recursion, to avoid using another stack frame. We need to reset
1028 any options that changed within the bracket before re-running it, so
1029 check the next opcode. */
1030
1031 if (ecode[1+LINK_SIZE] == OP_OPT)
1032 {
1033 ims = (ims & ~PCRE_IMS) | ecode[4];
1034 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1035 }
1036
1037 if (*ecode == OP_KETRMIN)
1038 {
1039 RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1040 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1041 ecode = prev;
1042 flags = match_tail_recursed;
1043 goto TAIL_RECURSE;
1044 }
1045 else /* OP_KETRMAX */
1046 {
1047 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1048 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1049 ecode += 1 + LINK_SIZE;
1050 flags = match_tail_recursed;
1051 goto TAIL_RECURSE;
1052 }
1053 /* Control never gets here */
1054
1055 /* An alternation is the end of a branch; scan along to find the end of the
1056 bracketed group and go to there. */
1057
1058 case OP_ALT:
1059 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1060 break;
1061
1062 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1063 that it may occur zero times. It may repeat infinitely, or not at all -
1064 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1065 repeat limits are compiled as a number of copies, with the optional ones
1066 preceded by BRAZERO or BRAMINZERO. */
1067
1068 case OP_BRAZERO:
1069 {
1070 next = ecode+1;
1071 RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1072 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1073 do next += GET(next,1); while (*next == OP_ALT);
1074 ecode = next + 1 + LINK_SIZE;
1075 }
1076 break;
1077
1078 case OP_BRAMINZERO:
1079 {
1080 next = ecode+1;
1081 do next += GET(next, 1); while (*next == OP_ALT);
1082 RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1083 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1084 ecode++;
1085 }
1086 break;
1087
1088 /* End of a group, repeated or non-repeating. */
1089
1090 case OP_KET:
1091 case OP_KETRMIN:
1092 case OP_KETRMAX:
1093 prev = ecode - GET(ecode, 1);
1094
1095 /* If this was a group that remembered the subject start, in order to break
1096 infinite repeats of empty string matches, retrieve the subject start from
1097 the chain. Otherwise, set it NULL. */
1098
1099 if (*prev >= OP_SBRA)
1100 {
1101 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1102 eptrb = eptrb->epb_prev; /* Backup to previous group */
1103 }
1104 else saved_eptr = NULL;
1105
1106 /* If we are at the end of an assertion group, stop matching and return
1107 MATCH_MATCH, but record the current high water mark for use by positive
1108 assertions. Do this also for the "once" (atomic) groups. */
1109
1110 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1111 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1112 *prev == OP_ONCE)
1113 {
1114 md->end_match_ptr = eptr; /* For ONCE */
1115 md->end_offset_top = offset_top;
1116 RRETURN(MATCH_MATCH);
1117 }
1118
1119 /* For capturing groups we have to check the group number back at the start
1120 and if necessary complete handling an extraction by setting the offsets and
1121 bumping the high water mark. Note that whole-pattern recursion is coded as
1122 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1123 when the OP_END is reached. Other recursion is handled here. */
1124
1125 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1126 {
1127 number = GET2(prev, 1+LINK_SIZE);
1128 offset = number << 1;
1129
1130 #ifdef DEBUG
1131 printf("end bracket %d", number);
1132 printf("\n");
1133 #endif
1134
1135 md->capture_last = number;
1136 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1137 {
1138 md->offset_vector[offset] =
1139 md->offset_vector[md->offset_end - number];
1140 md->offset_vector[offset+1] = eptr - md->start_subject;
1141 if (offset_top <= offset) offset_top = offset + 2;
1142 }
1143
1144 /* Handle a recursively called group. Restore the offsets
1145 appropriately and continue from after the call. */
1146
1147 if (md->recursive != NULL && md->recursive->group_num == number)
1148 {
1149 recursion_info *rec = md->recursive;
1150 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1151 md->recursive = rec->prevrec;
1152 md->start_match = rec->save_start;
1153 memcpy(md->offset_vector, rec->offset_save,
1154 rec->saved_max * sizeof(int));
1155 ecode = rec->after_call;
1156 ims = original_ims;
1157 break;
1158 }
1159 }
1160
1161 /* For both capturing and non-capturing groups, reset the value of the ims
1162 flags, in case they got changed during the group. */
1163
1164 ims = original_ims;
1165 DPRINTF(("ims reset to %02lx\n", ims));
1166
1167 /* For a non-repeating ket, just continue at this level. This also
1168 happens for a repeating ket if no characters were matched in the group.
1169 This is the forcible breaking of infinite loops as implemented in Perl
1170 5.005. If there is an options reset, it will get obeyed in the normal
1171 course of events. */
1172
1173 if (*ecode == OP_KET || eptr == saved_eptr)
1174 {
1175 ecode += 1 + LINK_SIZE;
1176 break;
1177 }
1178
1179 /* The repeating kets try the rest of the pattern or restart from the
1180 preceding bracket, in the appropriate order. In the second case, we can use
1181 tail recursion to avoid using another stack frame. */
1182
1183 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1184
1185 if (*ecode == OP_KETRMIN)
1186 {
1187 RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1188 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1189 ecode = prev;
1190 flags |= match_tail_recursed;
1191 goto TAIL_RECURSE;
1192 }
1193 else /* OP_KETRMAX */
1194 {
1195 RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1196 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1197 ecode += 1 + LINK_SIZE;
1198 flags = match_tail_recursed;
1199 goto TAIL_RECURSE;
1200 }
1201 /* Control never gets here */
1202
1203 /* Start of subject unless notbol, or after internal newline if multiline */
1204
1205 case OP_CIRC:
1206 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1207 if ((ims & PCRE_MULTILINE) != 0)
1208 {
1209 if (eptr != md->start_subject &&
1210 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1211 RRETURN(MATCH_NOMATCH);
1212 ecode++;
1213 break;
1214 }
1215 /* ... else fall through */
1216
1217 /* Start of subject assertion */
1218
1219 case OP_SOD:
1220 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1221 ecode++;
1222 break;
1223
1224 /* Start of match assertion */
1225
1226 case OP_SOM:
1227 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1228 ecode++;
1229 break;
1230
1231 /* Assert before internal newline if multiline, or before a terminating
1232 newline unless endonly is set, else end of subject unless noteol is set. */
1233
1234 case OP_DOLL:
1235 if ((ims & PCRE_MULTILINE) != 0)
1236 {
1237 if (eptr < md->end_subject)
1238 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1239 else
1240 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1241 ecode++;
1242 break;
1243 }
1244 else
1245 {
1246 if (md->noteol) RRETURN(MATCH_NOMATCH);
1247 if (!md->endonly)
1248 {
1249 if (eptr != md->end_subject &&
1250 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1251 RRETURN(MATCH_NOMATCH);
1252 ecode++;
1253 break;
1254 }
1255 }
1256 /* ... else fall through for endonly */
1257
1258 /* End of subject assertion (\z) */
1259
1260 case OP_EOD:
1261 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1262 ecode++;
1263 break;
1264
1265 /* End of subject or ending \n assertion (\Z) */
1266
1267 case OP_EODN:
1268 if (eptr != md->end_subject &&
1269 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1270 RRETURN(MATCH_NOMATCH);
1271 ecode++;
1272 break;
1273
1274 /* Word boundary assertions */
1275
1276 case OP_NOT_WORD_BOUNDARY:
1277 case OP_WORD_BOUNDARY:
1278 {
1279
1280 /* Find out if the previous and current characters are "word" characters.
1281 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1282 be "non-word" characters. */
1283
1284 #ifdef SUPPORT_UTF8
1285 if (utf8)
1286 {
1287 if (eptr == md->start_subject) prev_is_word = FALSE; else
1288 {
1289 const uschar *lastptr = eptr - 1;
1290 while((*lastptr & 0xc0) == 0x80) lastptr--;
1291 GETCHAR(c, lastptr);
1292 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1293 }
1294 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1295 {
1296 GETCHAR(c, eptr);
1297 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1298 }
1299 }
1300 else
1301 #endif
1302
1303 /* More streamlined when not in UTF-8 mode */
1304
1305 {
1306 prev_is_word = (eptr != md->start_subject) &&
1307 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1308 cur_is_word = (eptr < md->end_subject) &&
1309 ((md->ctypes[*eptr] & ctype_word) != 0);
1310 }
1311
1312 /* Now see if the situation is what we want */
1313
1314 if ((*ecode++ == OP_WORD_BOUNDARY)?
1315 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1316 RRETURN(MATCH_NOMATCH);
1317 }
1318 break;
1319
1320 /* Match a single character type; inline for speed */
1321
1322 case OP_ANY:
1323 if ((ims & PCRE_DOTALL) == 0)
1324 {
1325 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1326 }
1327 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1328 if (utf8)
1329 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1330 ecode++;
1331 break;
1332
1333 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1334 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1335
1336 case OP_ANYBYTE:
1337 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1338 ecode++;
1339 break;
1340
1341 case OP_NOT_DIGIT:
1342 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1343 GETCHARINCTEST(c, eptr);
1344 if (
1345 #ifdef SUPPORT_UTF8
1346 c < 256 &&
1347 #endif
1348 (md->ctypes[c] & ctype_digit) != 0
1349 )
1350 RRETURN(MATCH_NOMATCH);
1351 ecode++;
1352 break;
1353
1354 case OP_DIGIT:
1355 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1356 GETCHARINCTEST(c, eptr);
1357 if (
1358 #ifdef SUPPORT_UTF8
1359 c >= 256 ||
1360 #endif
1361 (md->ctypes[c] & ctype_digit) == 0
1362 )
1363 RRETURN(MATCH_NOMATCH);
1364 ecode++;
1365 break;
1366
1367 case OP_NOT_WHITESPACE:
1368 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1369 GETCHARINCTEST(c, eptr);
1370 if (
1371 #ifdef SUPPORT_UTF8
1372 c < 256 &&
1373 #endif
1374 (md->ctypes[c] & ctype_space) != 0
1375 )
1376 RRETURN(MATCH_NOMATCH);
1377 ecode++;
1378 break;
1379
1380 case OP_WHITESPACE:
1381 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1382 GETCHARINCTEST(c, eptr);
1383 if (
1384 #ifdef SUPPORT_UTF8
1385 c >= 256 ||
1386 #endif
1387 (md->ctypes[c] & ctype_space) == 0
1388 )
1389 RRETURN(MATCH_NOMATCH);
1390 ecode++;
1391 break;
1392
1393 case OP_NOT_WORDCHAR:
1394 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1395 GETCHARINCTEST(c, eptr);
1396 if (
1397 #ifdef SUPPORT_UTF8
1398 c < 256 &&
1399 #endif
1400 (md->ctypes[c] & ctype_word) != 0
1401 )
1402 RRETURN(MATCH_NOMATCH);
1403 ecode++;
1404 break;
1405
1406 case OP_WORDCHAR:
1407 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1408 GETCHARINCTEST(c, eptr);
1409 if (
1410 #ifdef SUPPORT_UTF8
1411 c >= 256 ||
1412 #endif
1413 (md->ctypes[c] & ctype_word) == 0
1414 )
1415 RRETURN(MATCH_NOMATCH);
1416 ecode++;
1417 break;
1418
1419 case OP_ANYNL:
1420 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1421 GETCHARINCTEST(c, eptr);
1422 switch(c)
1423 {
1424 default: RRETURN(MATCH_NOMATCH);
1425 case 0x000d:
1426 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1427 break;
1428 case 0x000a:
1429 case 0x000b:
1430 case 0x000c:
1431 case 0x0085:
1432 case 0x2028:
1433 case 0x2029:
1434 break;
1435 }
1436 ecode++;
1437 break;
1438
1439 #ifdef SUPPORT_UCP
1440 /* Check the next character by Unicode property. We will get here only
1441 if the support is in the binary; otherwise a compile-time error occurs. */
1442
1443 case OP_PROP:
1444 case OP_NOTPROP:
1445 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1446 GETCHARINCTEST(c, eptr);
1447 {
1448 int chartype, script;
1449 int category = _pcre_ucp_findprop(c, &chartype, &script);
1450
1451 switch(ecode[1])
1452 {
1453 case PT_ANY:
1454 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1455 break;
1456
1457 case PT_LAMP:
1458 if ((chartype == ucp_Lu ||
1459 chartype == ucp_Ll ||
1460 chartype == ucp_Lt) == (op == OP_NOTPROP))
1461 RRETURN(MATCH_NOMATCH);
1462 break;
1463
1464 case PT_GC:
1465 if ((ecode[2] != category) == (op == OP_PROP))
1466 RRETURN(MATCH_NOMATCH);
1467 break;
1468
1469 case PT_PC:
1470 if ((ecode[2] != chartype) == (op == OP_PROP))
1471 RRETURN(MATCH_NOMATCH);
1472 break;
1473
1474 case PT_SC:
1475 if ((ecode[2] != script) == (op == OP_PROP))
1476 RRETURN(MATCH_NOMATCH);
1477 break;
1478
1479 default:
1480 RRETURN(PCRE_ERROR_INTERNAL);
1481 }
1482
1483 ecode += 3;
1484 }
1485 break;
1486
1487 /* Match an extended Unicode sequence. We will get here only if the support
1488 is in the binary; otherwise a compile-time error occurs. */
1489
1490 case OP_EXTUNI:
1491 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1492 GETCHARINCTEST(c, eptr);
1493 {
1494 int chartype, script;
1495 int category = _pcre_ucp_findprop(c, &chartype, &script);
1496 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1497 while (eptr < md->end_subject)
1498 {
1499 int len = 1;
1500 if (!utf8) c = *eptr; else
1501 {
1502 GETCHARLEN(c, eptr, len);
1503 }
1504 category = _pcre_ucp_findprop(c, &chartype, &script);
1505 if (category != ucp_M) break;
1506 eptr += len;
1507 }
1508 }
1509 ecode++;
1510 break;
1511 #endif
1512
1513
1514 /* Match a back reference, possibly repeatedly. Look past the end of the
1515 item to see if there is repeat information following. The code is similar
1516 to that for character classes, but repeated for efficiency. Then obey
1517 similar code to character type repeats - written out again for speed.
1518 However, if the referenced string is the empty string, always treat
1519 it as matched, any number of times (otherwise there could be infinite
1520 loops). */
1521
1522 case OP_REF:
1523 {
1524 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1525 ecode += 3; /* Advance past item */
1526
1527 /* If the reference is unset, set the length to be longer than the amount
1528 of subject left; this ensures that every attempt at a match fails. We
1529 can't just fail here, because of the possibility of quantifiers with zero
1530 minima. */
1531
1532 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1533 md->end_subject - eptr + 1 :
1534 md->offset_vector[offset+1] - md->offset_vector[offset];
1535
1536 /* Set up for repetition, or handle the non-repeated case */
1537
1538 switch (*ecode)
1539 {
1540 case OP_CRSTAR:
1541 case OP_CRMINSTAR:
1542 case OP_CRPLUS:
1543 case OP_CRMINPLUS:
1544 case OP_CRQUERY:
1545 case OP_CRMINQUERY:
1546 c = *ecode++ - OP_CRSTAR;
1547 minimize = (c & 1) != 0;
1548 min = rep_min[c]; /* Pick up values from tables; */
1549 max = rep_max[c]; /* zero for max => infinity */
1550 if (max == 0) max = INT_MAX;
1551 break;
1552
1553 case OP_CRRANGE:
1554 case OP_CRMINRANGE:
1555 minimize = (*ecode == OP_CRMINRANGE);
1556 min = GET2(ecode, 1);
1557 max = GET2(ecode, 3);
1558 if (max == 0) max = INT_MAX;
1559 ecode += 5;
1560 break;
1561
1562 default: /* No repeat follows */
1563 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1564 eptr += length;
1565 continue; /* With the main loop */
1566 }
1567
1568 /* If the length of the reference is zero, just continue with the
1569 main loop. */
1570
1571 if (length == 0) continue;
1572
1573 /* First, ensure the minimum number of matches are present. We get back
1574 the length of the reference string explicitly rather than passing the
1575 address of eptr, so that eptr can be a register variable. */
1576
1577 for (i = 1; i <= min; i++)
1578 {
1579 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1580 eptr += length;
1581 }
1582
1583 /* If min = max, continue at the same level without recursion.
1584 They are not both allowed to be zero. */
1585
1586 if (min == max) continue;
1587
1588 /* If minimizing, keep trying and advancing the pointer */
1589
1590 if (minimize)
1591 {
1592 for (fi = min;; fi++)
1593 {
1594 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1595 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1596 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1597 RRETURN(MATCH_NOMATCH);
1598 eptr += length;
1599 }
1600 /* Control never gets here */
1601 }
1602
1603 /* If maximizing, find the longest string and work backwards */
1604
1605 else
1606 {
1607 pp = eptr;
1608 for (i = min; i < max; i++)
1609 {
1610 if (!match_ref(offset, eptr, length, md, ims)) break;
1611 eptr += length;
1612 }
1613 while (eptr >= pp)
1614 {
1615 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1616 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617 eptr -= length;
1618 }
1619 RRETURN(MATCH_NOMATCH);
1620 }
1621 }
1622 /* Control never gets here */
1623
1624
1625
1626 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1627 used when all the characters in the class have values in the range 0-255,
1628 and either the matching is caseful, or the characters are in the range
1629 0-127 when UTF-8 processing is enabled. The only difference between
1630 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1631 encountered.
1632
1633 First, look past the end of the item to see if there is repeat information
1634 following. Then obey similar code to character type repeats - written out
1635 again for speed. */
1636
1637 case OP_NCLASS:
1638 case OP_CLASS:
1639 {
1640 data = ecode + 1; /* Save for matching */
1641 ecode += 33; /* Advance past the item */
1642
1643 switch (*ecode)
1644 {
1645 case OP_CRSTAR:
1646 case OP_CRMINSTAR:
1647 case OP_CRPLUS:
1648 case OP_CRMINPLUS:
1649 case OP_CRQUERY:
1650 case OP_CRMINQUERY:
1651 c = *ecode++ - OP_CRSTAR;
1652 minimize = (c & 1) != 0;
1653 min = rep_min[c]; /* Pick up values from tables; */
1654 max = rep_max[c]; /* zero for max => infinity */
1655 if (max == 0) max = INT_MAX;
1656 break;
1657
1658 case OP_CRRANGE:
1659 case OP_CRMINRANGE:
1660 minimize = (*ecode == OP_CRMINRANGE);
1661 min = GET2(ecode, 1);
1662 max = GET2(ecode, 3);
1663 if (max == 0) max = INT_MAX;
1664 ecode += 5;
1665 break;
1666
1667 default: /* No repeat follows */
1668 min = max = 1;
1669 break;
1670 }
1671
1672 /* First, ensure the minimum number of matches are present. */
1673
1674 #ifdef SUPPORT_UTF8
1675 /* UTF-8 mode */
1676 if (utf8)
1677 {
1678 for (i = 1; i <= min; i++)
1679 {
1680 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1681 GETCHARINC(c, eptr);
1682 if (c > 255)
1683 {
1684 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1685 }
1686 else
1687 {
1688 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1689 }
1690 }
1691 }
1692 else
1693 #endif
1694 /* Not UTF-8 mode */
1695 {
1696 for (i = 1; i <= min; i++)
1697 {
1698 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699 c = *eptr++;
1700 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1701 }
1702 }
1703
1704 /* If max == min we can continue with the main loop without the
1705 need to recurse. */
1706
1707 if (min == max) continue;
1708
1709 /* If minimizing, keep testing the rest of the expression and advancing
1710 the pointer while it matches the class. */
1711
1712 if (minimize)
1713 {
1714 #ifdef SUPPORT_UTF8
1715 /* UTF-8 mode */
1716 if (utf8)
1717 {
1718 for (fi = min;; fi++)
1719 {
1720 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1721 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1722 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1723 GETCHARINC(c, eptr);
1724 if (c > 255)
1725 {
1726 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1727 }
1728 else
1729 {
1730 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1731 }
1732 }
1733 }
1734 else
1735 #endif
1736 /* Not UTF-8 mode */
1737 {
1738 for (fi = min;; fi++)
1739 {
1740 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1741 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1742 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1743 c = *eptr++;
1744 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1745 }
1746 }
1747 /* Control never gets here */
1748 }
1749
1750 /* If maximizing, find the longest possible run, then work backwards. */
1751
1752 else
1753 {
1754 pp = eptr;
1755
1756 #ifdef SUPPORT_UTF8
1757 /* UTF-8 mode */
1758 if (utf8)
1759 {
1760 for (i = min; i < max; i++)
1761 {
1762 int len = 1;
1763 if (eptr >= md->end_subject) break;
1764 GETCHARLEN(c, eptr, len);
1765 if (c > 255)
1766 {
1767 if (op == OP_CLASS) break;
1768 }
1769 else
1770 {
1771 if ((data[c/8] & (1 << (c&7))) == 0) break;
1772 }
1773 eptr += len;
1774 }
1775 for (;;)
1776 {
1777 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1778 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1779 if (eptr-- == pp) break; /* Stop if tried at original pos */
1780 BACKCHAR(eptr);
1781 }
1782 }
1783 else
1784 #endif
1785 /* Not UTF-8 mode */
1786 {
1787 for (i = min; i < max; i++)
1788 {
1789 if (eptr >= md->end_subject) break;
1790 c = *eptr;
1791 if ((data[c/8] & (1 << (c&7))) == 0) break;
1792 eptr++;
1793 }
1794 while (eptr >= pp)
1795 {
1796 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1797 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1798 eptr--;
1799 }
1800 }
1801
1802 RRETURN(MATCH_NOMATCH);
1803 }
1804 }
1805 /* Control never gets here */
1806
1807
1808 /* Match an extended character class. This opcode is encountered only
1809 in UTF-8 mode, because that's the only time it is compiled. */
1810
1811 #ifdef SUPPORT_UTF8
1812 case OP_XCLASS:
1813 {
1814 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1815 ecode += GET(ecode, 1); /* Advance past the item */
1816
1817 switch (*ecode)
1818 {
1819 case OP_CRSTAR:
1820 case OP_CRMINSTAR:
1821 case OP_CRPLUS:
1822 case OP_CRMINPLUS:
1823 case OP_CRQUERY:
1824 case OP_CRMINQUERY:
1825 c = *ecode++ - OP_CRSTAR;
1826 minimize = (c & 1) != 0;
1827 min = rep_min[c]; /* Pick up values from tables; */
1828 max = rep_max[c]; /* zero for max => infinity */
1829 if (max == 0) max = INT_MAX;
1830 break;
1831
1832 case OP_CRRANGE:
1833 case OP_CRMINRANGE:
1834 minimize = (*ecode == OP_CRMINRANGE);
1835 min = GET2(ecode, 1);
1836 max = GET2(ecode, 3);
1837 if (max == 0) max = INT_MAX;
1838 ecode += 5;
1839 break;
1840
1841 default: /* No repeat follows */
1842 min = max = 1;
1843 break;
1844 }
1845
1846 /* First, ensure the minimum number of matches are present. */
1847
1848 for (i = 1; i <= min; i++)
1849 {
1850 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1851 GETCHARINC(c, eptr);
1852 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1853 }
1854
1855 /* If max == min we can continue with the main loop without the
1856 need to recurse. */
1857
1858 if (min == max) continue;
1859
1860 /* If minimizing, keep testing the rest of the expression and advancing
1861 the pointer while it matches the class. */
1862
1863 if (minimize)
1864 {
1865 for (fi = min;; fi++)
1866 {
1867 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1868 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1869 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1870 GETCHARINC(c, eptr);
1871 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1872 }
1873 /* Control never gets here */
1874 }
1875
1876 /* If maximizing, find the longest possible run, then work backwards. */
1877
1878 else
1879 {
1880 pp = eptr;
1881 for (i = min; i < max; i++)
1882 {
1883 int len = 1;
1884 if (eptr >= md->end_subject) break;
1885 GETCHARLEN(c, eptr, len);
1886 if (!_pcre_xclass(c, data)) break;
1887 eptr += len;
1888 }
1889 for(;;)
1890 {
1891 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
1892 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1893 if (eptr-- == pp) break; /* Stop if tried at original pos */
1894 BACKCHAR(eptr)
1895 }
1896 RRETURN(MATCH_NOMATCH);
1897 }
1898
1899 /* Control never gets here */
1900 }
1901 #endif /* End of XCLASS */
1902
1903 /* Match a single character, casefully */
1904
1905 case OP_CHAR:
1906 #ifdef SUPPORT_UTF8
1907 if (utf8)
1908 {
1909 length = 1;
1910 ecode++;
1911 GETCHARLEN(fc, ecode, length);
1912 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1913 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
1914 }
1915 else
1916 #endif
1917
1918 /* Non-UTF-8 mode */
1919 {
1920 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1921 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
1922 ecode += 2;
1923 }
1924 break;
1925
1926 /* Match a single character, caselessly */
1927
1928 case OP_CHARNC:
1929 #ifdef SUPPORT_UTF8
1930 if (utf8)
1931 {
1932 length = 1;
1933 ecode++;
1934 GETCHARLEN(fc, ecode, length);
1935
1936 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
1937
1938 /* If the pattern character's value is < 128, we have only one byte, and
1939 can use the fast lookup table. */
1940
1941 if (fc < 128)
1942 {
1943 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1944 }
1945
1946 /* Otherwise we must pick up the subject character */
1947
1948 else
1949 {
1950 unsigned int dc;
1951 GETCHARINC(dc, eptr);
1952 ecode += length;
1953
1954 /* If we have Unicode property support, we can use it to test the other
1955 case of the character, if there is one. */
1956
1957 if (fc != dc)
1958 {
1959 #ifdef SUPPORT_UCP
1960 if (dc != _pcre_ucp_othercase(fc))
1961 #endif
1962 RRETURN(MATCH_NOMATCH);
1963 }
1964 }
1965 }
1966 else
1967 #endif /* SUPPORT_UTF8 */
1968
1969 /* Non-UTF-8 mode */
1970 {
1971 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
1972 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
1973 ecode += 2;
1974 }
1975 break;
1976
1977 /* Match a single character repeatedly. */
1978
1979 case OP_EXACT:
1980 min = max = GET2(ecode, 1);
1981 ecode += 3;
1982 goto REPEATCHAR;
1983
1984 case OP_POSUPTO:
1985 possessive = TRUE;
1986 /* Fall through */
1987
1988 case OP_UPTO:
1989 case OP_MINUPTO:
1990 min = 0;
1991 max = GET2(ecode, 1);
1992 minimize = *ecode == OP_MINUPTO;
1993 ecode += 3;
1994 goto REPEATCHAR;
1995
1996 case OP_POSSTAR:
1997 possessive = TRUE;
1998 min = 0;
1999 max = INT_MAX;
2000 ecode++;
2001 goto REPEATCHAR;
2002
2003 case OP_POSPLUS:
2004 possessive = TRUE;
2005 min = 1;
2006 max = INT_MAX;
2007 ecode++;
2008 goto REPEATCHAR;
2009
2010 case OP_POSQUERY:
2011 possessive = TRUE;
2012 min = 0;
2013 max = 1;
2014 ecode++;
2015 goto REPEATCHAR;
2016
2017 case OP_STAR:
2018 case OP_MINSTAR:
2019 case OP_PLUS:
2020 case OP_MINPLUS:
2021 case OP_QUERY:
2022 case OP_MINQUERY:
2023 c = *ecode++ - OP_STAR;
2024 minimize = (c & 1) != 0;
2025 min = rep_min[c]; /* Pick up values from tables; */
2026 max = rep_max[c]; /* zero for max => infinity */
2027 if (max == 0) max = INT_MAX;
2028
2029 /* Common code for all repeated single-character matches. We can give
2030 up quickly if there are fewer than the minimum number of characters left in
2031 the subject. */
2032
2033 REPEATCHAR:
2034 #ifdef SUPPORT_UTF8
2035 if (utf8)
2036 {
2037 length = 1;
2038 charptr = ecode;
2039 GETCHARLEN(fc, ecode, length);
2040 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2041 ecode += length;
2042
2043 /* Handle multibyte character matching specially here. There is
2044 support for caseless matching if UCP support is present. */
2045
2046 if (length > 1)
2047 {
2048 int oclength = 0;
2049 uschar occhars[8];
2050
2051 #ifdef SUPPORT_UCP
2052 unsigned int othercase;
2053 if ((ims & PCRE_CASELESS) != 0 &&
2054 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2055 oclength = _pcre_ord2utf8(othercase, occhars);
2056 #endif /* SUPPORT_UCP */
2057
2058 for (i = 1; i <= min; i++)
2059 {
2060 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2061 /* Need braces because of following else */
2062 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2063 else
2064 {
2065 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2066 eptr += oclength;
2067 }
2068 }
2069
2070 if (min == max) continue;
2071
2072 if (minimize)
2073 {
2074 for (fi = min;; fi++)
2075 {
2076 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2077 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2078 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2079 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2080 /* Need braces because of following else */
2081 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2082 else
2083 {
2084 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2085 eptr += oclength;
2086 }
2087 }
2088 /* Control never gets here */
2089 }
2090
2091 else /* Maximize */
2092 {
2093 pp = eptr;
2094 for (i = min; i < max; i++)
2095 {
2096 if (eptr > md->end_subject - length) break;
2097 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2098 else if (oclength == 0) break;
2099 else
2100 {
2101 if (memcmp(eptr, occhars, oclength) != 0) break;
2102 eptr += oclength;
2103 }
2104 }
2105
2106 if (possessive) continue;
2107 while (eptr >= pp)
2108 {
2109 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2110 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2111 eptr -= length;
2112 }
2113 RRETURN(MATCH_NOMATCH);
2114 }
2115 /* Control never gets here */
2116 }
2117
2118 /* If the length of a UTF-8 character is 1, we fall through here, and
2119 obey the code as for non-UTF-8 characters below, though in this case the
2120 value of fc will always be < 128. */
2121 }
2122 else
2123 #endif /* SUPPORT_UTF8 */
2124
2125 /* When not in UTF-8 mode, load a single-byte character. */
2126 {
2127 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2128 fc = *ecode++;
2129 }
2130
2131 /* The value of fc at this point is always less than 256, though we may or
2132 may not be in UTF-8 mode. The code is duplicated for the caseless and
2133 caseful cases, for speed, since matching characters is likely to be quite
2134 common. First, ensure the minimum number of matches are present. If min =
2135 max, continue at the same level without recursing. Otherwise, if
2136 minimizing, keep trying the rest of the expression and advancing one
2137 matching character if failing, up to the maximum. Alternatively, if
2138 maximizing, find the maximum number of characters and work backwards. */
2139
2140 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2141 max, eptr));
2142
2143 if ((ims & PCRE_CASELESS) != 0)
2144 {
2145 fc = md->lcc[fc];
2146 for (i = 1; i <= min; i++)
2147 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2148 if (min == max) continue;
2149 if (minimize)
2150 {
2151 for (fi = min;; fi++)
2152 {
2153 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2154 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2155 if (fi >= max || eptr >= md->end_subject ||
2156 fc != md->lcc[*eptr++])
2157 RRETURN(MATCH_NOMATCH);
2158 }
2159 /* Control never gets here */
2160 }
2161 else /* Maximize */
2162 {
2163 pp = eptr;
2164 for (i = min; i < max; i++)
2165 {
2166 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2167 eptr++;
2168 }
2169 if (possessive) continue;
2170 while (eptr >= pp)
2171 {
2172 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2173 eptr--;
2174 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2175 }
2176 RRETURN(MATCH_NOMATCH);
2177 }
2178 /* Control never gets here */
2179 }
2180
2181 /* Caseful comparisons (includes all multi-byte characters) */
2182
2183 else
2184 {
2185 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2186 if (min == max) continue;
2187 if (minimize)
2188 {
2189 for (fi = min;; fi++)
2190 {
2191 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2192 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2193 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2194 RRETURN(MATCH_NOMATCH);
2195 }
2196 /* Control never gets here */
2197 }
2198 else /* Maximize */
2199 {
2200 pp = eptr;
2201 for (i = min; i < max; i++)
2202 {
2203 if (eptr >= md->end_subject || fc != *eptr) break;
2204 eptr++;
2205 }
2206 if (possessive) continue;
2207 while (eptr >= pp)
2208 {
2209 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2210 eptr--;
2211 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212 }
2213 RRETURN(MATCH_NOMATCH);
2214 }
2215 }
2216 /* Control never gets here */
2217
2218 /* Match a negated single one-byte character. The character we are
2219 checking can be multibyte. */
2220
2221 case OP_NOT:
2222 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223 ecode++;
2224 GETCHARINCTEST(c, eptr);
2225 if ((ims & PCRE_CASELESS) != 0)
2226 {
2227 #ifdef SUPPORT_UTF8
2228 if (c < 256)
2229 #endif
2230 c = md->lcc[c];
2231 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2232 }
2233 else
2234 {
2235 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2236 }
2237 break;
2238
2239 /* Match a negated single one-byte character repeatedly. This is almost a
2240 repeat of the code for a repeated single character, but I haven't found a
2241 nice way of commoning these up that doesn't require a test of the
2242 positive/negative option for each character match. Maybe that wouldn't add
2243 very much to the time taken, but character matching *is* what this is all
2244 about... */
2245
2246 case OP_NOTEXACT:
2247 min = max = GET2(ecode, 1);
2248 ecode += 3;
2249 goto REPEATNOTCHAR;
2250
2251 case OP_NOTUPTO:
2252 case OP_NOTMINUPTO:
2253 min = 0;
2254 max = GET2(ecode, 1);
2255 minimize = *ecode == OP_NOTMINUPTO;
2256 ecode += 3;
2257 goto REPEATNOTCHAR;
2258
2259 case OP_NOTPOSSTAR:
2260 possessive = TRUE;
2261 min = 0;
2262 max = INT_MAX;
2263 ecode++;
2264 goto REPEATNOTCHAR;
2265
2266 case OP_NOTPOSPLUS:
2267 possessive = TRUE;
2268 min = 1;
2269 max = INT_MAX;
2270 ecode++;
2271 goto REPEATNOTCHAR;
2272
2273 case OP_NOTPOSQUERY:
2274 possessive = TRUE;
2275 min = 0;
2276 max = 1;
2277 ecode++;
2278 goto REPEATNOTCHAR;
2279
2280 case OP_NOTPOSUPTO:
2281 possessive = TRUE;
2282 min = 0;
2283 max = GET2(ecode, 1);
2284 ecode += 3;
2285 goto REPEATNOTCHAR;
2286
2287 case OP_NOTSTAR:
2288 case OP_NOTMINSTAR:
2289 case OP_NOTPLUS:
2290 case OP_NOTMINPLUS:
2291 case OP_NOTQUERY:
2292 case OP_NOTMINQUERY:
2293 c = *ecode++ - OP_NOTSTAR;
2294 minimize = (c & 1) != 0;
2295 min = rep_min[c]; /* Pick up values from tables; */
2296 max = rep_max[c]; /* zero for max => infinity */
2297 if (max == 0) max = INT_MAX;
2298
2299 /* Common code for all repeated single-byte matches. We can give up quickly
2300 if there are fewer than the minimum number of bytes left in the
2301 subject. */
2302
2303 REPEATNOTCHAR:
2304 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2305 fc = *ecode++;
2306
2307 /* The code is duplicated for the caseless and caseful cases, for speed,
2308 since matching characters is likely to be quite common. First, ensure the
2309 minimum number of matches are present. If min = max, continue at the same
2310 level without recursing. Otherwise, if minimizing, keep trying the rest of
2311 the expression and advancing one matching character if failing, up to the
2312 maximum. Alternatively, if maximizing, find the maximum number of
2313 characters and work backwards. */
2314
2315 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2316 max, eptr));
2317
2318 if ((ims & PCRE_CASELESS) != 0)
2319 {
2320 fc = md->lcc[fc];
2321
2322 #ifdef SUPPORT_UTF8
2323 /* UTF-8 mode */
2324 if (utf8)
2325 {
2326 register unsigned int d;
2327 for (i = 1; i <= min; i++)
2328 {
2329 GETCHARINC(d, eptr);
2330 if (d < 256) d = md->lcc[d];
2331 if (fc == d) RRETURN(MATCH_NOMATCH);
2332 }
2333 }
2334 else
2335 #endif
2336
2337 /* Not UTF-8 mode */
2338 {
2339 for (i = 1; i <= min; i++)
2340 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2341 }
2342
2343 if (min == max) continue;
2344
2345 if (minimize)
2346 {
2347 #ifdef SUPPORT_UTF8
2348 /* UTF-8 mode */
2349 if (utf8)
2350 {
2351 register unsigned int d;
2352 for (fi = min;; fi++)
2353 {
2354 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2355 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2356 GETCHARINC(d, eptr);
2357 if (d < 256) d = md->lcc[d];
2358 if (fi >= max || eptr >= md->end_subject || fc == d)
2359 RRETURN(MATCH_NOMATCH);
2360 }
2361 }
2362 else
2363 #endif
2364 /* Not UTF-8 mode */
2365 {
2366 for (fi = min;; fi++)
2367 {
2368 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2369 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2371 RRETURN(MATCH_NOMATCH);
2372 }
2373 }
2374 /* Control never gets here */
2375 }
2376
2377 /* Maximize case */
2378
2379 else
2380 {
2381 pp = eptr;
2382
2383 #ifdef SUPPORT_UTF8
2384 /* UTF-8 mode */
2385 if (utf8)
2386 {
2387 register unsigned int d;
2388 for (i = min; i < max; i++)
2389 {
2390 int len = 1;
2391 if (eptr >= md->end_subject) break;
2392 GETCHARLEN(d, eptr, len);
2393 if (d < 256) d = md->lcc[d];
2394 if (fc == d) break;
2395 eptr += len;
2396 }
2397 if (possessive) continue;
2398 for(;;)
2399 {
2400 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2401 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2402 if (eptr-- == pp) break; /* Stop if tried at original pos */
2403 BACKCHAR(eptr);
2404 }
2405 }
2406 else
2407 #endif
2408 /* Not UTF-8 mode */
2409 {
2410 for (i = min; i < max; i++)
2411 {
2412 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2413 eptr++;
2414 }
2415 if (possessive) continue;
2416 while (eptr >= pp)
2417 {
2418 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2419 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2420 eptr--;
2421 }
2422 }
2423
2424 RRETURN(MATCH_NOMATCH);
2425 }
2426 /* Control never gets here */
2427 }
2428
2429 /* Caseful comparisons */
2430
2431 else
2432 {
2433 #ifdef SUPPORT_UTF8
2434 /* UTF-8 mode */
2435 if (utf8)
2436 {
2437 register unsigned int d;
2438 for (i = 1; i <= min; i++)
2439 {
2440 GETCHARINC(d, eptr);
2441 if (fc == d) RRETURN(MATCH_NOMATCH);
2442 }
2443 }
2444 else
2445 #endif
2446 /* Not UTF-8 mode */
2447 {
2448 for (i = 1; i <= min; i++)
2449 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2450 }
2451
2452 if (min == max) continue;
2453
2454 if (minimize)
2455 {
2456 #ifdef SUPPORT_UTF8
2457 /* UTF-8 mode */
2458 if (utf8)
2459 {
2460 register unsigned int d;
2461 for (fi = min;; fi++)
2462 {
2463 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2464 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2465 GETCHARINC(d, eptr);
2466 if (fi >= max || eptr >= md->end_subject || fc == d)
2467 RRETURN(MATCH_NOMATCH);
2468 }
2469 }
2470 else
2471 #endif
2472 /* Not UTF-8 mode */
2473 {
2474 for (fi = min;; fi++)
2475 {
2476 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2477 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2478 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2479 RRETURN(MATCH_NOMATCH);
2480 }
2481 }
2482 /* Control never gets here */
2483 }
2484
2485 /* Maximize case */
2486
2487 else
2488 {
2489 pp = eptr;
2490
2491 #ifdef SUPPORT_UTF8
2492 /* UTF-8 mode */
2493 if (utf8)
2494 {
2495 register unsigned int d;
2496 for (i = min; i < max; i++)
2497 {
2498 int len = 1;
2499 if (eptr >= md->end_subject) break;
2500 GETCHARLEN(d, eptr, len);
2501 if (fc == d) break;
2502 eptr += len;
2503 }
2504 if (possessive) continue;
2505 for(;;)
2506 {
2507 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2508 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2509 if (eptr-- == pp) break; /* Stop if tried at original pos */
2510 BACKCHAR(eptr);
2511 }
2512 }
2513 else
2514 #endif
2515 /* Not UTF-8 mode */
2516 {
2517 for (i = min; i < max; i++)
2518 {
2519 if (eptr >= md->end_subject || fc == *eptr) break;
2520 eptr++;
2521 }
2522 if (possessive) continue;
2523 while (eptr >= pp)
2524 {
2525 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2526 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527 eptr--;
2528 }
2529 }
2530
2531 RRETURN(MATCH_NOMATCH);
2532 }
2533 }
2534 /* Control never gets here */
2535
2536 /* Match a single character type repeatedly; several different opcodes
2537 share code. This is very similar to the code for single characters, but we
2538 repeat it in the interests of efficiency. */
2539
2540 case OP_TYPEEXACT:
2541 min = max = GET2(ecode, 1);
2542 minimize = TRUE;
2543 ecode += 3;
2544 goto REPEATTYPE;
2545
2546 case OP_TYPEUPTO:
2547 case OP_TYPEMINUPTO:
2548 min = 0;
2549 max = GET2(ecode, 1);
2550 minimize = *ecode == OP_TYPEMINUPTO;
2551 ecode += 3;
2552 goto REPEATTYPE;
2553
2554 case OP_TYPEPOSSTAR:
2555 possessive = TRUE;
2556 min = 0;
2557 max = INT_MAX;
2558 ecode++;
2559 goto REPEATTYPE;
2560
2561 case OP_TYPEPOSPLUS:
2562 possessive = TRUE;
2563 min = 1;
2564 max = INT_MAX;
2565 ecode++;
2566 goto REPEATTYPE;
2567
2568 case OP_TYPEPOSQUERY:
2569 possessive = TRUE;
2570 min = 0;
2571 max = 1;
2572 ecode++;
2573 goto REPEATTYPE;
2574
2575 case OP_TYPEPOSUPTO:
2576 possessive = TRUE;
2577 min = 0;
2578 max = GET2(ecode, 1);
2579 ecode += 3;
2580 goto REPEATTYPE;
2581
2582 case OP_TYPESTAR:
2583 case OP_TYPEMINSTAR:
2584 case OP_TYPEPLUS:
2585 case OP_TYPEMINPLUS:
2586 case OP_TYPEQUERY:
2587 case OP_TYPEMINQUERY:
2588 c = *ecode++ - OP_TYPESTAR;
2589 minimize = (c & 1) != 0;
2590 min = rep_min[c]; /* Pick up values from tables; */
2591 max = rep_max[c]; /* zero for max => infinity */
2592 if (max == 0) max = INT_MAX;
2593
2594 /* Common code for all repeated single character type matches. Note that
2595 in UTF-8 mode, '.' matches a character of any length, but for the other
2596 character types, the valid characters are all one-byte long. */
2597
2598 REPEATTYPE:
2599 ctype = *ecode++; /* Code for the character type */
2600
2601 #ifdef SUPPORT_UCP
2602 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2603 {
2604 prop_fail_result = ctype == OP_NOTPROP;
2605 prop_type = *ecode++;
2606 prop_value = *ecode++;
2607 }
2608 else prop_type = -1;
2609 #endif
2610
2611 /* First, ensure the minimum number of matches are present. Use inline
2612 code for maximizing the speed, and do the type test once at the start
2613 (i.e. keep it out of the loop). Also we can test that there are at least
2614 the minimum number of bytes before we start. This isn't as effective in
2615 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2616 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2617 and single-bytes. */
2618
2619 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2620 if (min > 0)
2621 {
2622 #ifdef SUPPORT_UCP
2623 if (prop_type >= 0)
2624 {
2625 switch(prop_type)
2626 {
2627 case PT_ANY:
2628 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2629 for (i = 1; i <= min; i++)
2630 {
2631 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2632 GETCHARINC(c, eptr);
2633 }
2634 break;
2635
2636 case PT_LAMP:
2637 for (i = 1; i <= min; i++)
2638 {
2639 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2640 GETCHARINC(c, eptr);
2641 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2642 if ((prop_chartype == ucp_Lu ||
2643 prop_chartype == ucp_Ll ||
2644 prop_chartype == ucp_Lt) == prop_fail_result)
2645 RRETURN(MATCH_NOMATCH);
2646 }
2647 break;
2648
2649 case PT_GC:
2650 for (i = 1; i <= min; i++)
2651 {
2652 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653 GETCHARINC(c, eptr);
2654 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2655 if ((prop_category == prop_value) == prop_fail_result)
2656 RRETURN(MATCH_NOMATCH);
2657 }
2658 break;
2659
2660 case PT_PC:
2661 for (i = 1; i <= min; i++)
2662 {
2663 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2664 GETCHARINC(c, eptr);
2665 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2666 if ((prop_chartype == prop_value) == prop_fail_result)
2667 RRETURN(MATCH_NOMATCH);
2668 }
2669 break;
2670
2671 case PT_SC:
2672 for (i = 1; i <= min; i++)
2673 {
2674 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2675 GETCHARINC(c, eptr);
2676 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2677 if ((prop_script == prop_value) == prop_fail_result)
2678 RRETURN(MATCH_NOMATCH);
2679 }
2680 break;
2681
2682 default:
2683 RRETURN(PCRE_ERROR_INTERNAL);
2684 }
2685 }
2686
2687 /* Match extended Unicode sequences. We will get here only if the
2688 support is in the binary; otherwise a compile-time error occurs. */
2689
2690 else if (ctype == OP_EXTUNI)
2691 {
2692 for (i = 1; i <= min; i++)
2693 {
2694 GETCHARINCTEST(c, eptr);
2695 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2696 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2697 while (eptr < md->end_subject)
2698 {
2699 int len = 1;
2700 if (!utf8) c = *eptr; else
2701 {
2702 GETCHARLEN(c, eptr, len);
2703 }
2704 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2705 if (prop_category != ucp_M) break;
2706 eptr += len;
2707 }
2708 }
2709 }
2710
2711 else
2712 #endif /* SUPPORT_UCP */
2713
2714 /* Handle all other cases when the coding is UTF-8 */
2715
2716 #ifdef SUPPORT_UTF8
2717 if (utf8) switch(ctype)
2718 {
2719 case OP_ANY:
2720 for (i = 1; i <= min; i++)
2721 {
2722 if (eptr >= md->end_subject ||
2723 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2724 RRETURN(MATCH_NOMATCH);
2725 eptr++;
2726 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2727 }
2728 break;
2729
2730 case OP_ANYBYTE:
2731 eptr += min;
2732 break;
2733
2734 case OP_ANYNL:
2735 for (i = 1; i <= min; i++)
2736 {
2737 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2738 GETCHARINC(c, eptr);
2739 switch(c)
2740 {
2741 default: RRETURN(MATCH_NOMATCH);
2742 case 0x000d:
2743 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2744 break;
2745 case 0x000a:
2746 case 0x000b:
2747 case 0x000c:
2748 case 0x0085:
2749 case 0x2028:
2750 case 0x2029:
2751 break;
2752 }
2753 }
2754 break;
2755
2756 case OP_NOT_DIGIT:
2757 for (i = 1; i <= min; i++)
2758 {
2759 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2760 GETCHARINC(c, eptr);
2761 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
2762 RRETURN(MATCH_NOMATCH);
2763 }
2764 break;
2765
2766 case OP_DIGIT:
2767 for (i = 1; i <= min; i++)
2768 {
2769 if (eptr >= md->end_subject ||
2770 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
2771 RRETURN(MATCH_NOMATCH);
2772 /* No need to skip more bytes - we know it's a 1-byte character */
2773 }
2774 break;
2775
2776 case OP_NOT_WHITESPACE:
2777 for (i = 1; i <= min; i++)
2778 {
2779 if (eptr >= md->end_subject ||
2780 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
2781 RRETURN(MATCH_NOMATCH);
2782 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2783 }
2784 break;
2785
2786 case OP_WHITESPACE:
2787 for (i = 1; i <= min; i++)
2788 {
2789 if (eptr >= md->end_subject ||
2790 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
2791 RRETURN(MATCH_NOMATCH);
2792 /* No need to skip more bytes - we know it's a 1-byte character */
2793 }
2794 break;
2795
2796 case OP_NOT_WORDCHAR:
2797 for (i = 1; i <= min; i++)
2798 {
2799 if (eptr >= md->end_subject ||
2800 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
2801 RRETURN(MATCH_NOMATCH);
2802 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2803 }
2804 break;
2805
2806 case OP_WORDCHAR:
2807 for (i = 1; i <= min; i++)
2808 {
2809 if (eptr >= md->end_subject ||
2810 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
2811 RRETURN(MATCH_NOMATCH);
2812 /* No need to skip more bytes - we know it's a 1-byte character */
2813 }
2814 break;
2815
2816 default:
2817 RRETURN(PCRE_ERROR_INTERNAL);
2818 } /* End switch(ctype) */
2819
2820 else
2821 #endif /* SUPPORT_UTF8 */
2822
2823 /* Code for the non-UTF-8 case for minimum matching of operators other
2824 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2825 number of bytes present, as this was tested above. */
2826
2827 switch(ctype)
2828 {
2829 case OP_ANY:
2830 if ((ims & PCRE_DOTALL) == 0)
2831 {
2832 for (i = 1; i <= min; i++)
2833 {
2834 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2835 eptr++;
2836 }
2837 }
2838 else eptr += min;
2839 break;
2840
2841 case OP_ANYBYTE:
2842 eptr += min;
2843 break;
2844
2845 /* Because of the CRLF case, we can't assume the minimum number of
2846 bytes are present in this case. */
2847
2848 case OP_ANYNL:
2849 for (i = 1; i <= min; i++)
2850 {
2851 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852 switch(*eptr++)
2853 {
2854 default: RRETURN(MATCH_NOMATCH);
2855 case 0x000d:
2856 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2857 break;
2858 case 0x000a:
2859 case 0x000b:
2860 case 0x000c:
2861 case 0x0085:
2862 break;
2863 }
2864 }
2865 break;
2866
2867 case OP_NOT_DIGIT:
2868 for (i = 1; i <= min; i++)
2869 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
2870 break;
2871
2872 case OP_DIGIT:
2873 for (i = 1; i <= min; i++)
2874 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
2875 break;
2876
2877 case OP_NOT_WHITESPACE:
2878 for (i = 1; i <= min; i++)
2879 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
2880 break;
2881
2882 case OP_WHITESPACE:
2883 for (i = 1; i <= min; i++)
2884 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
2885 break;
2886
2887 case OP_NOT_WORDCHAR:
2888 for (i = 1; i <= min; i++)
2889 if ((md->ctypes[*eptr++] & ctype_word) != 0)
2890 RRETURN(MATCH_NOMATCH);
2891 break;
2892
2893 case OP_WORDCHAR:
2894 for (i = 1; i <= min; i++)
2895 if ((md->ctypes[*eptr++] & ctype_word) == 0)
2896 RRETURN(MATCH_NOMATCH);
2897 break;
2898
2899 default:
2900 RRETURN(PCRE_ERROR_INTERNAL);
2901 }
2902 }
2903
2904 /* If min = max, continue at the same level without recursing */
2905
2906 if (min == max) continue;
2907
2908 /* If minimizing, we have to test the rest of the pattern before each
2909 subsequent match. Again, separate the UTF-8 case for speed, and also
2910 separate the UCP cases. */
2911
2912 if (minimize)
2913 {
2914 #ifdef SUPPORT_UCP
2915 if (prop_type >= 0)
2916 {
2917 switch(prop_type)
2918 {
2919 case PT_ANY:
2920 for (fi = min;; fi++)
2921 {
2922 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2923 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2924 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925 GETCHARINC(c, eptr);
2926 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2927 }
2928 /* Control never gets here */
2929
2930 case PT_LAMP:
2931 for (fi = min;; fi++)
2932 {
2933 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2934 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2935 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2936 GETCHARINC(c, eptr);
2937 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2938 if ((prop_chartype == ucp_Lu ||
2939 prop_chartype == ucp_Ll ||
2940 prop_chartype == ucp_Lt) == prop_fail_result)
2941 RRETURN(MATCH_NOMATCH);
2942 }
2943 /* Control never gets here */
2944
2945 case PT_GC:
2946 for (fi = min;; fi++)
2947 {
2948 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2949 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2950 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2951 GETCHARINC(c, eptr);
2952 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2953 if ((prop_category == prop_value) == prop_fail_result)
2954 RRETURN(MATCH_NOMATCH);
2955 }
2956 /* Control never gets here */
2957
2958 case PT_PC:
2959 for (fi = min;; fi++)
2960 {
2961 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2962 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2963 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2964 GETCHARINC(c, eptr);
2965 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2966 if ((prop_chartype == prop_value) == prop_fail_result)
2967 RRETURN(MATCH_NOMATCH);
2968 }
2969 /* Control never gets here */
2970
2971 case PT_SC:
2972 for (fi = min;; fi++)
2973 {
2974 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2975 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2976 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2977 GETCHARINC(c, eptr);
2978 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2979 if ((prop_script == prop_value) == prop_fail_result)
2980 RRETURN(MATCH_NOMATCH);
2981 }
2982 /* Control never gets here */
2983
2984 default:
2985 RRETURN(PCRE_ERROR_INTERNAL);
2986 }
2987 }
2988
2989 /* Match extended Unicode sequences. We will get here only if the
2990 support is in the binary; otherwise a compile-time error occurs. */
2991
2992 else if (ctype == OP_EXTUNI)
2993 {
2994 for (fi = min;; fi++)
2995 {
2996 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2997 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2998 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2999 GETCHARINCTEST(c, eptr);
3000 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3001 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3002 while (eptr < md->end_subject)
3003 {
3004 int len = 1;
3005 if (!utf8) c = *eptr; else
3006 {
3007 GETCHARLEN(c, eptr, len);
3008 }
3009 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3010 if (prop_category != ucp_M) break;
3011 eptr += len;
3012 }
3013 }
3014 }
3015
3016 else
3017 #endif /* SUPPORT_UCP */
3018
3019 #ifdef SUPPORT_UTF8
3020 /* UTF-8 mode */
3021 if (utf8)
3022 {
3023 for (fi = min;; fi++)
3024 {
3025 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3026 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3027 if (fi >= max || eptr >= md->end_subject ||
3028 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3029 IS_NEWLINE(eptr)))
3030 RRETURN(MATCH_NOMATCH);
3031
3032 GETCHARINC(c, eptr);
3033 switch(ctype)
3034 {
3035 case OP_ANY: /* This is the DOTALL case */
3036 break;
3037
3038 case OP_ANYBYTE:
3039 break;
3040
3041 case OP_ANYNL:
3042 switch(c)
3043 {
3044 default: RRETURN(MATCH_NOMATCH);
3045 case 0x000d:
3046 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3047 break;
3048 case 0x000a:
3049 case 0x000b:
3050 case 0x000c:
3051 case 0x0085:
3052 case 0x2028:
3053 case 0x2029:
3054 break;
3055 }
3056 break;
3057
3058 case OP_NOT_DIGIT:
3059 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3060 RRETURN(MATCH_NOMATCH);
3061 break;
3062
3063 case OP_DIGIT:
3064 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3065 RRETURN(MATCH_NOMATCH);
3066 break;
3067
3068 case OP_NOT_WHITESPACE:
3069 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3070 RRETURN(MATCH_NOMATCH);
3071 break;
3072
3073 case OP_WHITESPACE:
3074 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3075 RRETURN(MATCH_NOMATCH);
3076 break;
3077
3078 case OP_NOT_WORDCHAR:
3079 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3080 RRETURN(MATCH_NOMATCH);
3081 break;
3082
3083 case OP_WORDCHAR:
3084 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3085 RRETURN(MATCH_NOMATCH);
3086 break;
3087
3088 default:
3089 RRETURN(PCRE_ERROR_INTERNAL);
3090 }
3091 }
3092 }
3093 else
3094 #endif
3095 /* Not UTF-8 mode */
3096 {
3097 for (fi = min;; fi++)
3098 {
3099 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3100 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3101 if (fi >= max || eptr >= md->end_subject ||
3102 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3103 RRETURN(MATCH_NOMATCH);
3104
3105 c = *eptr++;
3106 switch(ctype)
3107 {
3108 case OP_ANY: /* This is the DOTALL case */
3109 break;
3110
3111 case OP_ANYBYTE:
3112 break;
3113
3114 case OP_ANYNL:
3115 switch(c)
3116 {
3117 default: RRETURN(MATCH_NOMATCH);
3118 case 0x000d:
3119 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3120 break;
3121 case 0x000a:
3122 case 0x000b:
3123 case 0x000c:
3124 case 0x0085:
3125 break;
3126 }
3127 break;
3128
3129 case OP_NOT_DIGIT:
3130 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3131 break;
3132
3133 case OP_DIGIT:
3134 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3135 break;
3136
3137 case OP_NOT_WHITESPACE:
3138 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3139 break;
3140
3141 case OP_WHITESPACE:
3142 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3143 break;
3144
3145 case OP_NOT_WORDCHAR:
3146 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3147 break;
3148
3149 case OP_WORDCHAR:
3150 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3151 break;
3152
3153 default:
3154 RRETURN(PCRE_ERROR_INTERNAL);
3155 }
3156 }
3157 }
3158 /* Control never gets here */
3159 }
3160
3161 /* If maximizing, it is worth using inline code for speed, doing the type
3162 test once at the start (i.e. keep it out of the loop). Again, keep the
3163 UTF-8 and UCP stuff separate. */
3164
3165 else
3166 {
3167 pp = eptr; /* Remember where we started */
3168
3169 #ifdef SUPPORT_UCP
3170 if (prop_type >= 0)
3171 {
3172 switch(prop_type)
3173 {
3174 case PT_ANY:
3175 for (i = min; i < max; i++)
3176 {
3177 int len = 1;
3178 if (eptr >= md->end_subject) break;
3179 GETCHARLEN(c, eptr, len);
3180 if (prop_fail_result) break;
3181 eptr+= len;
3182 }
3183 break;
3184
3185 case PT_LAMP:
3186 for (i = min; i < max; i++)
3187 {
3188 int len = 1;
3189 if (eptr >= md->end_subject) break;
3190 GETCHARLEN(c, eptr, len);
3191 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3192 if ((prop_chartype == ucp_Lu ||
3193 prop_chartype == ucp_Ll ||
3194 prop_chartype == ucp_Lt) == prop_fail_result)
3195 break;
3196 eptr+= len;
3197 }
3198 break;
3199
3200 case PT_GC:
3201 for (i = min; i < max; i++)
3202 {
3203 int len = 1;
3204 if (eptr >= md->end_subject) break;
3205 GETCHARLEN(c, eptr, len);
3206 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3207 if ((prop_category == prop_value) == prop_fail_result)
3208 break;
3209 eptr+= len;
3210 }
3211 break;
3212
3213 case PT_PC:
3214 for (i = min; i < max; i++)
3215 {
3216 int len = 1;
3217 if (eptr >= md->end_subject) break;
3218 GETCHARLEN(c, eptr, len);
3219 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3220 if ((prop_chartype == prop_value) == prop_fail_result)
3221 break;
3222 eptr+= len;
3223 }
3224 break;
3225
3226 case PT_SC:
3227 for (i = min; i < max; i++)
3228 {
3229 int len = 1;
3230 if (eptr >= md->end_subject) break;
3231 GETCHARLEN(c, eptr, len);
3232 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3233 if ((prop_script == prop_value) == prop_fail_result)
3234 break;
3235 eptr+= len;
3236 }
3237 break;
3238 }
3239
3240 /* eptr is now past the end of the maximum run */
3241
3242 if (possessive) continue;
3243 for(;;)
3244 {
3245 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3246 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3247 if (eptr-- == pp) break; /* Stop if tried at original pos */
3248 BACKCHAR(eptr);
3249 }
3250 }
3251
3252 /* Match extended Unicode sequences. We will get here only if the
3253 support is in the binary; otherwise a compile-time error occurs. */
3254
3255 else if (ctype == OP_EXTUNI)
3256 {
3257 for (i = min; i < max; i++)
3258 {
3259 if (eptr >= md->end_subject) break;
3260 GETCHARINCTEST(c, eptr);
3261 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3262 if (prop_category == ucp_M) break;
3263 while (eptr < md->end_subject)
3264 {
3265 int len = 1;
3266 if (!utf8) c = *eptr; else
3267 {
3268 GETCHARLEN(c, eptr, len);
3269 }
3270 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3271 if (prop_category != ucp_M) break;
3272 eptr += len;
3273 }
3274 }
3275
3276 /* eptr is now past the end of the maximum run */
3277
3278 if (possessive) continue;
3279 for(;;)
3280 {
3281 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3282 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3283 if (eptr-- == pp) break; /* Stop if tried at original pos */
3284 for (;;) /* Move back over one extended */
3285 {
3286 int len = 1;
3287 BACKCHAR(eptr);
3288 if (!utf8) c = *eptr; else
3289 {
3290 GETCHARLEN(c, eptr, len);
3291 }
3292 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3293 if (prop_category != ucp_M) break;
3294 eptr--;
3295 }
3296 }
3297 }
3298
3299 else
3300 #endif /* SUPPORT_UCP */
3301
3302 #ifdef SUPPORT_UTF8
3303 /* UTF-8 mode */
3304
3305 if (utf8)
3306 {
3307 switch(ctype)
3308 {
3309 case OP_ANY:
3310
3311 /* Special code is required for UTF8, but when the maximum is
3312 unlimited we don't need it, so we repeat the non-UTF8 code. This is
3313 probably worth it, because .* is quite a common idiom. */
3314
3315 if (max < INT_MAX)
3316 {
3317 if ((ims & PCRE_DOTALL) == 0)
3318 {
3319 for (i = min; i < max; i++)
3320 {
3321 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3322 eptr++;
3323 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3324 }
3325 }
3326 else
3327 {
3328 for (i = min; i < max; i++)
3329 {
3330 if (eptr >= md->end_subject) break;
3331 eptr++;
3332 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3333 }
3334 }
3335 }
3336
3337 /* Handle unlimited UTF-8 repeat */
3338
3339 else
3340 {
3341 if ((ims & PCRE_DOTALL) == 0)
3342 {
3343 for (i = min; i < max; i++)
3344 {
3345 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3346 eptr++;
3347 }
3348 break;
3349 }
3350 else
3351 {
3352 c = max - min;
3353 if (c > (unsigned int)(md->end_subject - eptr))
3354 c = md->end_subject - eptr;
3355 eptr += c;
3356 }
3357 }
3358 break;
3359
3360 /* The byte case is the same as non-UTF8 */
3361
3362 case OP_ANYBYTE:
3363 c = max - min;
3364 if (c > (unsigned int)(md->end_subject - eptr))
3365 c = md->end_subject - eptr;
3366 eptr += c;
3367 break;
3368
3369 case OP_ANYNL:
3370 for (i = min; i < max; i++)
3371 {
3372 int len = 1;
3373 if (eptr >= md->end_subject) break;
3374 GETCHARLEN(c, eptr, len);
3375 if (c == 0x000d)
3376 {
3377 if (++eptr >= md->end_subject) break;
3378 if (*eptr == 0x000a) eptr++;
3379 }
3380 else
3381 {
3382 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3383 c != 0x0085 && c != 0x2028 && c != 0x2029)
3384 break;
3385 eptr += len;
3386 }
3387 }
3388 break;
3389
3390 case OP_NOT_DIGIT:
3391 for (i = min; i < max; i++)
3392 {
3393 int len = 1;
3394 if (eptr >= md->end_subject) break;
3395 GETCHARLEN(c, eptr, len);
3396 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3397 eptr+= len;
3398 }
3399 break;
3400
3401 case OP_DIGIT:
3402 for (i = min; i < max; i++)
3403 {
3404 int len = 1;
3405 if (eptr >= md->end_subject) break;
3406 GETCHARLEN(c, eptr, len);
3407 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3408 eptr+= len;
3409 }
3410 break;
3411
3412 case OP_NOT_WHITESPACE:
3413 for (i = min; i < max; i++)
3414 {
3415 int len = 1;
3416 if (eptr >= md->end_subject) break;
3417 GETCHARLEN(c, eptr, len);
3418 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3419 eptr+= len;
3420 }
3421 break;
3422
3423 case OP_WHITESPACE:
3424 for (i = min; i < max; i++)
3425 {
3426 int len = 1;
3427 if (eptr >= md->end_subject) break;
3428 GETCHARLEN(c, eptr, len);
3429 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3430 eptr+= len;
3431 }
3432 break;
3433
3434 case OP_NOT_WORDCHAR:
3435 for (i = min; i < max; i++)
3436 {
3437 int len = 1;
3438 if (eptr >= md->end_subject) break;
3439 GETCHARLEN(c, eptr, len);
3440 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3441 eptr+= len;
3442 }
3443 break;
3444
3445 case OP_WORDCHAR:
3446 for (i = min; i < max; i++)
3447 {
3448 int len = 1;
3449 if (eptr >= md->end_subject) break;
3450 GETCHARLEN(c, eptr, len);
3451 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3452 eptr+= len;
3453 }
3454 break;
3455
3456 default:
3457 RRETURN(PCRE_ERROR_INTERNAL);
3458 }
3459
3460 /* eptr is now past the end of the maximum run */
3461
3462 if (possessive) continue;
3463 for(;;)
3464 {
3465 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3466 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3467 if (eptr-- == pp) break; /* Stop if tried at original pos */
3468 BACKCHAR(eptr);
3469 }
3470 }
3471 else
3472 #endif
3473
3474 /* Not UTF-8 mode */
3475 {
3476 switch(ctype)
3477 {
3478 case OP_ANY:
3479 if ((ims & PCRE_DOTALL) == 0)
3480 {
3481 for (i = min; i < max; i++)
3482 {
3483 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3484 eptr++;
3485 }
3486 break;
3487 }
3488 /* For DOTALL case, fall through and treat as \C */
3489
3490 case OP_ANYBYTE:
3491 c = max - min;
3492 if (c > (unsigned int)(md->end_subject - eptr))
3493 c = md->end_subject - eptr;
3494 eptr += c;
3495 break;
3496
3497 case OP_ANYNL:
3498 for (i = min; i < max; i++)
3499 {
3500 if (eptr >= md->end_subject) break;
3501 c = *eptr;
3502 if (c == 0x000d)
3503 {
3504 if (++eptr >= md->end_subject) break;
3505 if (*eptr == 0x000a) eptr++;
3506 }
3507 else
3508 {
3509 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3510 break;
3511 eptr++;
3512 }
3513 }
3514 break;
3515
3516 case OP_NOT_DIGIT:
3517 for (i = min; i < max; i++)
3518 {
3519 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
3520 break;
3521 eptr++;
3522 }
3523 break;
3524
3525 case OP_DIGIT:
3526 for (i = min; i < max; i++)
3527 {
3528 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
3529 break;
3530 eptr++;
3531 }
3532 break;
3533
3534 case OP_NOT_WHITESPACE:
3535 for (i = min; i < max; i++)
3536 {
3537 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
3538 break;
3539 eptr++;
3540 }
3541 break;
3542
3543 case OP_WHITESPACE:
3544 for (i = min; i < max; i++)
3545 {
3546 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
3547 break;
3548 eptr++;
3549 }
3550 break;
3551
3552 case OP_NOT_WORDCHAR:
3553 for (i = min; i < max; i++)
3554 {
3555 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
3556 break;
3557 eptr++;
3558 }
3559 break;
3560
3561 case OP_WORDCHAR:
3562 for (i = min; i < max; i++)
3563 {
3564 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
3565 break;
3566 eptr++;
3567 }
3568 break;
3569
3570 default:
3571 RRETURN(PCRE_ERROR_INTERNAL);
3572 }
3573
3574 /* eptr is now past the end of the maximum run */
3575
3576 if (possessive) continue;
3577 while (eptr >= pp)
3578 {
3579 RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3580 eptr--;
3581 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3582 }
3583 }
3584
3585 /* Get here if we can't make it match with any permitted repetitions */
3586
3587 RRETURN(MATCH_NOMATCH);
3588 }
3589 /* Control never gets here */
3590
3591 /* There's been some horrible disaster. Arrival here can only mean there is
3592 something seriously wrong in the code above or the OP_xxx definitions. */
3593
3594 default:
3595 DPRINTF(("Unknown opcode %d\n", *ecode));
3596 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3597 }
3598
3599 /* Do not stick any code in here without much thought; it is assumed
3600 that "continue" in the code above comes out to here to repeat the main
3601 loop. */
3602
3603 } /* End of main loop */
3604 /* Control never reaches here */
3605 }
3606
3607
3608 /***************************************************************************
3609 ****************************************************************************
3610 RECURSION IN THE match() FUNCTION
3611
3612 Undefine all the macros that were defined above to handle this. */
3613
3614 #ifdef NO_RECURSE
3615 #undef eptr
3616 #undef ecode
3617 #undef offset_top
3618 #undef ims
3619 #undef eptrb
3620 #undef flags
3621
3622 #undef callpat
3623 #undef charptr
3624 #undef data
3625 #undef next
3626 #undef pp
3627 #undef prev
3628 #undef saved_eptr
3629
3630 #undef new_recursive
3631
3632 #undef cur_is_word
3633 #undef condition
3634 #undef prev_is_word
3635
3636 #undef original_ims
3637
3638 #undef ctype
3639 #undef length
3640 #undef max
3641 #undef min
3642 #undef number
3643 #undef offset
3644 #undef op
3645 #undef save_capture_last
3646 #undef save_offset1
3647 #undef save_offset2
3648 #undef save_offset3
3649 #undef stacksave
3650
3651 #undef newptrb
3652
3653 #endif
3654
3655 /* These two are defined as macros in both cases */
3656
3657 #undef fc
3658 #undef fi
3659
3660 /***************************************************************************
3661 ***************************************************************************/
3662
3663
3664
3665 /*************************************************
3666 * Execute a Regular Expression *
3667 *************************************************/
3668
3669 /* This function applies a compiled re to a subject string and picks out
3670 portions of the string if it matches. Two elements in the vector are set for
3671 each substring: the offsets to the start and end of the substring.
3672
3673 Arguments:
3674 argument_re points to the compiled expression
3675 extra_data points to extra data or is NULL
3676 subject points to the subject string
3677 length length of subject string (may contain binary zeros)
3678 start_offset where to start in the subject string
3679 options option bits
3680 offsets points to a vector of ints to be filled in with offsets
3681 offsetcount the number of elements in the vector
3682
3683 Returns: > 0 => success; value is the number of elements filled in
3684 = 0 => success, but offsets is not big enough
3685 -1 => failed to match
3686 < -1 => some kind of unexpected problem
3687 */
3688
3689 PCRE_DATA_SCOPE int
3690 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3691 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3692 int offsetcount)
3693 {
3694 int rc, resetcount, ocount;
3695 int first_byte = -1;
3696 int req_byte = -1;
3697 int req_byte2 = -1;
3698 int newline;
3699 unsigned long int ims;
3700 BOOL using_temporary_offsets = FALSE;
3701 BOOL anchored;
3702 BOOL startline;
3703 BOOL firstline;
3704 BOOL first_byte_caseless = FALSE;
3705 BOOL req_byte_caseless = FALSE;
3706 BOOL utf8;
3707 match_data match_block;
3708 match_data *md = &match_block;
3709 const uschar *tables;
3710 const uschar *start_bits = NULL;
3711 USPTR start_match = (USPTR)subject + start_offset;
3712 USPTR end_subject;
3713 USPTR req_byte_ptr = start_match - 1;
3714 eptrblock eptrchain[EPTR_WORK_SIZE];
3715
3716 pcre_study_data internal_study;
3717 const pcre_study_data *study;
3718
3719 real_pcre internal_re;
3720 const real_pcre *external_re = (const real_pcre *)argument_re;
3721 const real_pcre *re = external_re;
3722
3723 /* Plausibility checks */
3724
3725 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3726 if (re == NULL || subject == NULL ||
3727 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3728 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3729
3730 /* Fish out the optional data from the extra_data structure, first setting
3731 the default values. */
3732
3733 study = NULL;
3734 md->match_limit = MATCH_LIMIT;
3735 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3736 md->callout_data = NULL;
3737
3738 /* The table pointer is always in native byte order. */
3739
3740 tables = external_re->tables;
3741
3742 if (extra_data != NULL)
3743 {
3744 register unsigned int flags = extra_data->flags;
3745 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3746 study = (const pcre_study_data *)extra_data->study_data;
3747 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3748 md->match_limit = extra_data->match_limit;
3749 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3750 md->match_limit_recursion = extra_data->match_limit_recursion;
3751 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3752 md->callout_data = extra_data->callout_data;
3753 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3754 }
3755
3756 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
3757 is a feature that makes it possible to save compiled regex and re-use them
3758 in other programs later. */
3759
3760 if (tables == NULL) tables = _pcre_default_tables;
3761
3762 /* Check that the first field in the block is the magic number. If it is not,
3763 test for a regex that was compiled on a host of opposite endianness. If this is
3764 the case, flipped values are put in internal_re and internal_study if there was
3765 study data too. */
3766
3767 if (re->magic_number != MAGIC_NUMBER)
3768 {
3769 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
3770 if (re == NULL) return PCRE_ERROR_BADMAGIC;
3771 if (study != NULL) study = &internal_study;
3772 }
3773
3774 /* Set up other data */
3775
3776 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3777 startline = (re->options & PCRE_STARTLINE) != 0;
3778 firstline = (re->options & PCRE_FIRSTLINE) != 0;
3779
3780 /* The code starts after the real_pcre block and the capture name table. */
3781
3782 md->start_code = (const uschar *)external_re + re->name_table_offset +
3783 re->name_count * re->name_entry_size;
3784
3785 md->start_subject = (USPTR)subject;
3786 md->start_offset = start_offset;
3787 md->end_subject = md->start_subject + length;
3788 end_subject = md->end_subject;
3789
3790 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3791 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3792
3793 md->notbol = (options & PCRE_NOTBOL) != 0;
3794 md->noteol = (options & PCRE_NOTEOL) != 0;
3795 md->notempty = (options & PCRE_NOTEMPTY) != 0;
3796 md->partial = (options & PCRE_PARTIAL) != 0;
3797 md->hitend = FALSE;
3798
3799 md->recursive = NULL; /* No recursion at top level */
3800 md->eptrchain = eptrchain; /* Make workspace generally available */
3801
3802 md->lcc = tables + lcc_offset;
3803 md->ctypes = tables + ctypes_offset;
3804
3805 /* Handle different types of newline. The three bits give eight cases. If
3806 nothing is set at run time, whatever was used at compile time applies. */
3807
3808 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3809 PCRE_NEWLINE_BITS)
3810 {
3811 case 0: newline = NEWLINE; break; /* Compile-time default */
3812 case PCRE_NEWLINE_CR: newline = '\r'; break;
3813 case PCRE_NEWLINE_LF: newline = '\n'; break;
3814 case PCRE_NEWLINE_CR+
3815 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3816 case PCRE_NEWLINE_ANY: newline = -1; break;
3817 default: return PCRE_ERROR_BADNEWLINE;
3818 }
3819
3820 if (newline < 0)
3821 {
3822 md->nltype = NLTYPE_ANY;
3823 }
3824 else
3825 {
3826 md->nltype = NLTYPE_FIXED;
3827 if (newline > 255)
3828 {
3829 md->nllen = 2;
3830 md->nl[0] = (newline >> 8) & 255;
3831 md->nl[1] = newline & 255;
3832 }
3833 else
3834 {
3835 md->nllen = 1;
3836 md->nl[0] = newline;
3837 }
3838 }
3839
3840 /* Partial matching is supported only for a restricted set of regexes at the
3841 moment. */
3842
3843 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3844 return PCRE_ERROR_BADPARTIAL;
3845
3846 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3847 back the character offset. */
3848
3849 #ifdef SUPPORT_UTF8
3850 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3851 {
3852 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3853 return PCRE_ERROR_BADUTF8;
3854 if (start_offset > 0 && start_offset < length)
3855 {
3856 int tb = ((uschar *)subject)[start_offset];
3857 if (tb > 127)
3858 {
3859 tb &= 0xc0;
3860 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
3861 }
3862 }
3863 }
3864 #endif
3865
3866 /* The ims options can vary during the matching as a result of the presence
3867 of (?ims) items in the pattern. They are kept in a local variable so that
3868 restoring at the exit of a group is easy. */
3869
3870 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3871
3872 /* If the expression has got more back references than the offsets supplied can
3873 hold, we get a temporary chunk of working store to use during the matching.
3874 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3875 of 3. */
3876
3877 ocount = offsetcount - (offsetcount % 3);
3878
3879 if (re->top_backref > 0 && re->top_backref >= ocount/3)
3880 {
3881 ocount = re->top_backref * 3 + 3;
3882 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3883 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3884 using_temporary_offsets = TRUE;
3885 DPRINTF(("Got memory to hold back references\n"));
3886 }
3887 else md->offset_vector = offsets;
3888
3889 md->offset_end = ocount;
3890 md->offset_max = (2*ocount)/3;
3891 md->offset_overflow = FALSE;
3892 md->capture_last = -1;
3893
3894 /* Compute the minimum number of offsets that we need to reset each time. Doing
3895 this makes a huge difference to execution time when there aren't many brackets
3896 in the pattern. */
3897
3898 resetcount = 2 + re->top_bracket * 2;
3899 if (resetcount > offsetcount) resetcount = ocount;
3900
3901 /* Reset the working variable associated with each extraction. These should
3902 never be used unless previously set, but they get saved and restored, and so we
3903 initialize them to avoid reading uninitialized locations. */
3904
3905 if (md->offset_vector != NULL)
3906 {
3907 register int *iptr = md->offset_vector + ocount;
3908 register int *iend = iptr - resetcount/2 + 1;
3909 while (--iptr >= iend) *iptr = -1;
3910 }
3911
3912 /* Set up the first character to match, if available. The first_byte value is
3913 never set for an anchored regular expression, but the anchoring may be forced
3914 at run time, so we have to test for anchoring. The first char may be unset for
3915 an unanchored pattern, of course. If there's no first char and the pattern was
3916 studied, there may be a bitmap of possible first characters. */
3917
3918 if (!anchored)
3919 {
3920 if ((re->options & PCRE_FIRSTSET) != 0)
3921 {
3922 first_byte = re->first_byte & 255;
3923 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3924 first_byte = md->lcc[first_byte];
3925 }
3926 else
3927 if (!startline && study != NULL &&
3928 (study->options & PCRE_STUDY_MAPPED) != 0)
3929 start_bits = study->start_bits;
3930 }
3931
3932 /* For anchored or unanchored matches, there may be a "last known required
3933 character" set. */
3934
3935 if ((re->options & PCRE_REQCHSET) != 0)
3936 {
3937 req_byte = re->req_byte & 255;
3938 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3939 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
3940 }
3941
3942
3943 /* ==========================================================================*/
3944
3945 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3946 the loop runs just once. */
3947
3948 for(;;)
3949 {
3950 USPTR save_end_subject = end_subject;
3951
3952 /* Reset the maximum number of extractions we might see. */
3953
3954 if (md->offset_vector != NULL)
3955 {
3956 register int *iptr = md->offset_vector;
3957 register int *iend = iptr + resetcount;
3958 while (iptr < iend) *iptr++ = -1;
3959 }
3960
3961 /* Advance to a unique first char if possible. If firstline is TRUE, the
3962 start of the match is constrained to the first line of a multiline string.
3963 That is, the match must be before or at the first newline. Implement this by
3964 temporarily adjusting end_subject so that we stop scanning at a newline. If
3965 the match fails at the newline, later code breaks this loop. */
3966
3967 if (firstline)
3968 {
3969 USPTR t = start_match;
3970 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3971 end_subject = t;
3972 }
3973
3974 /* Now test for a unique first byte */
3975
3976 if (first_byte >= 0)
3977 {
3978 if (first_byte_caseless)
3979 while (start_match < end_subject &&
3980 md->lcc[*start_match] != first_byte)
3981 start_match++;
3982 else
3983 while (start_match < end_subject && *start_match != first_byte)
3984 start_match++;
3985 }
3986
3987 /* Or to just after a linebreak for a multiline match if possible */
3988
3989 else if (startline)
3990 {
3991 if (start_match > md->start_subject + start_offset)
3992 {
3993 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
3994 start_match++;
3995 }
3996 }
3997
3998 /* Or to a non-unique first char after study */
3999
4000 else if (start_bits != NULL)
4001 {
4002 while (start_match < end_subject)
4003 {
4004 register unsigned int c = *start_match;
4005 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4006 }
4007 }
4008
4009 /* Restore fudged end_subject */
4010
4011 end_subject = save_end_subject;
4012
4013 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4014 printf(">>>> Match against: ");
4015 pchars(start_match, end_subject - start_match, TRUE, md);
4016 printf("\n");
4017 #endif
4018
4019 /* If req_byte is set, we know that that character must appear in the subject
4020 for the match to succeed. If the first character is set, req_byte must be
4021 later in the subject; otherwise the test starts at the match point. This
4022 optimization can save a huge amount of backtracking in patterns with nested
4023 unlimited repeats that aren't going to match. Writing separate code for
4024 cased/caseless versions makes it go faster, as does using an autoincrement
4025 and backing off on a match.
4026
4027 HOWEVER: when the subject string is very, very long, searching to its end can
4028 take a long time, and give bad performance on quite ordinary patterns. This
4029 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4030 string... so we don't do this when the string is sufficiently long.
4031
4032 ALSO: this processing is disabled when partial matching is requested.
4033 */
4034
4035 if (req_byte >= 0 &&
4036 end_subject - start_match < REQ_BYTE_MAX &&
4037 !md->partial)
4038 {
4039 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4040
4041 /* We don't need to repeat the search if we haven't yet reached the
4042 place we found it at last time. */
4043
4044 if (p > req_byte_ptr)
4045 {
4046 if (req_byte_caseless)
4047 {
4048 while (p < end_subject)
4049 {
4050 register int pp = *p++;
4051 if (pp == req_byte || pp == req_byte2) { p--; break; }
4052 }
4053 }
4054 else
4055 {
4056 while (p < end_subject)
4057 {
4058 if (*p++ == req_byte) { p--; break; }
4059 }
4060 }
4061
4062 /* If we can't find the required character, break the matching loop,
4063 forcing a match failure. */
4064
4065 if (p >= end_subject)
4066 {
4067 rc = MATCH_NOMATCH;
4068 break;
4069 }
4070
4071 /* If we have found the required character, save the point where we
4072 found it, so that we don't search again next time round the loop if
4073 the start hasn't passed this character yet. */
4074
4075 req_byte_ptr = p;
4076 }
4077 }
4078
4079 /* OK, we can now run the match. */
4080
4081 md->start_match = start_match;
4082 md->match_call_count = 0;
4083 md->eptrn = 0; /* Next free eptrchain slot */
4084 rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4085
4086 /* Any return other than MATCH_NOMATCH breaks the loop. */
4087
4088 if (rc != MATCH_NOMATCH) break;
4089
4090 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4091 newline in the subject (though it may continue over the newline). Therefore,
4092 if we have just failed to match, starting at a newline, do not continue. */
4093
4094 if (firstline && IS_NEWLINE(start_match)) break;
4095
4096 /* Advance the match position by one character. */
4097
4098 start_match++;
4099 #ifdef SUPPORT_UTF8
4100 if (utf8)
4101 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4102 start_match++;
4103 #endif
4104
4105 /* Break the loop if the pattern is anchored or if we have passed the end of
4106 the subject. */
4107
4108 if (anchored || start_match > end_subject) break;
4109
4110 /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4111 are now at a LF, advance the match position by one more character. */
4112
4113 if (start_match[-1] == '\r' &&
4114 (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4115 start_match < end_subject &&
4116 *start_match == '\n')
4117 start_match++;
4118
4119 } /* End of for(;;) "bumpalong" loop */
4120
4121 /* ==========================================================================*/
4122
4123 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4124 conditions is true:
4125
4126 (1) The pattern is anchored;
4127
4128 (2) We are past the end of the subject;
4129
4130 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4131 this option requests that a match occur at or before the first newline in
4132 the subject.
4133
4134 When we have a match and the offset vector is big enough to deal with any
4135 backreferences, captured substring offsets will already be set up. In the case
4136 where we had to get some local store to hold offsets for backreference
4137 processing, copy those that we can. In this case there need not be overflow if
4138 certain parts of the pattern were not used, even though there are more
4139 capturing parentheses than vector slots. */
4140
4141 if (rc == MATCH_MATCH)
4142 {
4143 if (using_temporary_offsets)
4144 {
4145 if (offsetcount >= 4)
4146 {
4147 memcpy(offsets + 2, md->offset_vector + 2,
4148 (offsetcount - 2) * sizeof(int));
4149 DPRINTF(("Copied offsets from temporary memory\n"));
4150 }
4151 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4152 DPRINTF(("Freeing temporary memory\n"));
4153 (pcre_free)(md->offset_vector);
4154 }
4155
4156 /* Set the return code to the number of captured strings, or 0 if there are
4157 too many to fit into the vector. */
4158
4159 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4160
4161 /* If there is space, set up the whole thing as substring 0. */
4162
4163 if (offsetcount < 2) rc = 0; else
4164 {
4165 offsets[0] = start_match - md->start_subject;
4166 offsets[1] = md->end_match_ptr - md->start_subject;
4167 }
4168
4169 DPRINTF((">>>> returning %d\n", rc));
4170 return rc;
4171 }
4172
4173 /* Control gets here if there has been an error, or if the overall match
4174 attempt has failed at all permitted starting positions. */
4175
4176 if (using_temporary_offsets)
4177 {
4178 DPRINTF(("Freeing temporary memory\n"));
4179 (pcre_free)(md->offset_vector);
4180 }
4181
4182 if (rc != MATCH_NOMATCH)
4183 {
4184 DPRINTF((">>>> error: returning %d\n", rc));
4185 return rc;
4186 }
4187 else if (md->partial && md->hitend)
4188 {
4189 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4190 return PCRE_ERROR_PARTIAL;
4191 }
4192 else
4193 {
4194 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4195 return PCRE_ERROR_NOMATCH;
4196 }
4197 }
4198
4199 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12