/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 207 - (show annotations) (download)
Mon Aug 6 09:32:14 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 146799 byte(s)
Fix backtrack past start of subject bugs caused by the use of \X, \p, or \P in 
non-UTF-8 mode and the presence of top-bit-set characters.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include <config.h>
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Maximum number of ints of offset to save on the stack for recursive calls.
72 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
73 because the offset vector is always a multiple of 3 long. */
74
75 #define REC_STACK_SAVE_MAX 30
76
77 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
78
79 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
80 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
81
82
83
84 #ifdef DEBUG
85 /*************************************************
86 * Debugging function to print chars *
87 *************************************************/
88
89 /* Print a sequence of chars in printable format, stopping at the end of the
90 subject if the requested.
91
92 Arguments:
93 p points to characters
94 length number to print
95 is_subject TRUE if printing from within md->start_subject
96 md pointer to matching data block, if is_subject is TRUE
97
98 Returns: nothing
99 */
100
101 static void
102 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
103 {
104 unsigned int c;
105 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
106 while (length-- > 0)
107 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
108 }
109 #endif
110
111
112
113 /*************************************************
114 * Match a back-reference *
115 *************************************************/
116
117 /* If a back reference hasn't been set, the length that is passed is greater
118 than the number of characters left in the string, so the match fails.
119
120 Arguments:
121 offset index into the offset vector
122 eptr points into the subject
123 length length to be matched
124 md points to match data block
125 ims the ims flags
126
127 Returns: TRUE if matched
128 */
129
130 static BOOL
131 match_ref(int offset, register USPTR eptr, int length, match_data *md,
132 unsigned long int ims)
133 {
134 USPTR p = md->start_subject + md->offset_vector[offset];
135
136 #ifdef DEBUG
137 if (eptr >= md->end_subject)
138 printf("matching subject <null>");
139 else
140 {
141 printf("matching subject ");
142 pchars(eptr, length, TRUE, md);
143 }
144 printf(" against backref ");
145 pchars(p, length, FALSE, md);
146 printf("\n");
147 #endif
148
149 /* Always fail if not enough characters left */
150
151 if (length > md->end_subject - eptr) return FALSE;
152
153 /* Separate the caselesss case for speed */
154
155 if ((ims & PCRE_CASELESS) != 0)
156 {
157 while (length-- > 0)
158 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
159 }
160 else
161 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
162
163 return TRUE;
164 }
165
166
167
168 /***************************************************************************
169 ****************************************************************************
170 RECURSION IN THE match() FUNCTION
171
172 The match() function is highly recursive, though not every recursive call
173 increases the recursive depth. Nevertheless, some regular expressions can cause
174 it to recurse to a great depth. I was writing for Unix, so I just let it call
175 itself recursively. This uses the stack for saving everything that has to be
176 saved for a recursive call. On Unix, the stack can be large, and this works
177 fine.
178
179 It turns out that on some non-Unix-like systems there are problems with
180 programs that use a lot of stack. (This despite the fact that every last chip
181 has oodles of memory these days, and techniques for extending the stack have
182 been known for decades.) So....
183
184 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
185 calls by keeping local variables that need to be preserved in blocks of memory
186 obtained from malloc() instead instead of on the stack. Macros are used to
187 achieve this so that the actual code doesn't look very different to what it
188 always used to.
189
190 The original heap-recursive code used longjmp(). However, it seems that this
191 can be very slow on some operating systems. Following a suggestion from Stan
192 Switzer, the use of longjmp() has been abolished, at the cost of having to
193 provide a unique number for each call to RMATCH. There is no way of generating
194 a sequence of numbers at compile time in C. I have given them names, to make
195 them stand out more clearly.
196
197 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
198 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
199 tests. Furthermore, not using longjmp() means that local dynamic variables
200 don't have indeterminate values; this has meant that the frame size can be
201 reduced because the result can be "passed back" by straight setting of the
202 variable instead of being passed in the frame.
203 ****************************************************************************
204 ***************************************************************************/
205
206
207 /* Numbers for RMATCH calls */
208
209 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
210 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
211 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
212 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
213 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
214
215
216 /* These versions of the macros use the stack, as normal. There are debugging
217 versions and production versions. Note that the "rw" argument of RMATCH isn't
218 actuall used in this definition. */
219
220 #ifndef NO_RECURSE
221 #define REGISTER register
222
223 #ifdef DEBUG
224 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
225 { \
226 printf("match() called in line %d\n", __LINE__); \
227 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
228 printf("to line %d\n", __LINE__); \
229 }
230 #define RRETURN(ra) \
231 { \
232 printf("match() returned %d from line %d ", ra, __LINE__); \
233 return ra; \
234 }
235 #else
236 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
237 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
238 #define RRETURN(ra) return ra
239 #endif
240
241 #else
242
243
244 /* These versions of the macros manage a private stack on the heap. Note that
245 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
246 argument of match(), which never changes. */
247
248 #define REGISTER
249
250 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
251 {\
252 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
253 frame->Xwhere = rw; \
254 newframe->Xeptr = ra;\
255 newframe->Xecode = rb;\
256 newframe->Xmstart = mstart;\
257 newframe->Xoffset_top = rc;\
258 newframe->Xims = re;\
259 newframe->Xeptrb = rf;\
260 newframe->Xflags = rg;\
261 newframe->Xrdepth = frame->Xrdepth + 1;\
262 newframe->Xprevframe = frame;\
263 frame = newframe;\
264 DPRINTF(("restarting from line %d\n", __LINE__));\
265 goto HEAP_RECURSE;\
266 L_##rw:\
267 DPRINTF(("jumped back to line %d\n", __LINE__));\
268 }
269
270 #define RRETURN(ra)\
271 {\
272 heapframe *newframe = frame;\
273 frame = newframe->Xprevframe;\
274 (pcre_stack_free)(newframe);\
275 if (frame != NULL)\
276 {\
277 rrc = ra;\
278 goto HEAP_RETURN;\
279 }\
280 return ra;\
281 }
282
283
284 /* Structure for remembering the local variables in a private frame */
285
286 typedef struct heapframe {
287 struct heapframe *Xprevframe;
288
289 /* Function arguments that may change */
290
291 const uschar *Xeptr;
292 const uschar *Xecode;
293 const uschar *Xmstart;
294 int Xoffset_top;
295 long int Xims;
296 eptrblock *Xeptrb;
297 int Xflags;
298 unsigned int Xrdepth;
299
300 /* Function local variables */
301
302 const uschar *Xcallpat;
303 const uschar *Xcharptr;
304 const uschar *Xdata;
305 const uschar *Xnext;
306 const uschar *Xpp;
307 const uschar *Xprev;
308 const uschar *Xsaved_eptr;
309
310 recursion_info Xnew_recursive;
311
312 BOOL Xcur_is_word;
313 BOOL Xcondition;
314 BOOL Xprev_is_word;
315
316 unsigned long int Xoriginal_ims;
317
318 #ifdef SUPPORT_UCP
319 int Xprop_type;
320 int Xprop_value;
321 int Xprop_fail_result;
322 int Xprop_category;
323 int Xprop_chartype;
324 int Xprop_script;
325 int Xoclength;
326 uschar Xocchars[8];
327 #endif
328
329 int Xctype;
330 unsigned int Xfc;
331 int Xfi;
332 int Xlength;
333 int Xmax;
334 int Xmin;
335 int Xnumber;
336 int Xoffset;
337 int Xop;
338 int Xsave_capture_last;
339 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
340 int Xstacksave[REC_STACK_SAVE_MAX];
341
342 eptrblock Xnewptrb;
343
344 /* Where to jump back to */
345
346 int Xwhere;
347
348 } heapframe;
349
350 #endif
351
352
353 /***************************************************************************
354 ***************************************************************************/
355
356
357
358 /*************************************************
359 * Match from current position *
360 *************************************************/
361
362 /* This function is called recursively in many circumstances. Whenever it
363 returns a negative (error) response, the outer incarnation must also return the
364 same response.
365
366 Performance note: It might be tempting to extract commonly used fields from the
367 md structure (e.g. utf8, end_subject) into individual variables to improve
368 performance. Tests using gcc on a SPARC disproved this; in the first case, it
369 made performance worse.
370
371 Arguments:
372 eptr pointer to current character in subject
373 ecode pointer to current position in compiled code
374 mstart pointer to the current match start position (can be modified
375 by encountering \K)
376 offset_top current top pointer
377 md pointer to "static" info for the match
378 ims current /i, /m, and /s options
379 eptrb pointer to chain of blocks containing eptr at start of
380 brackets - for testing for empty matches
381 flags can contain
382 match_condassert - this is an assertion condition
383 match_cbegroup - this is the start of an unlimited repeat
384 group that can match an empty string
385 rdepth the recursion depth
386
387 Returns: MATCH_MATCH if matched ) these values are >= 0
388 MATCH_NOMATCH if failed to match )
389 a negative PCRE_ERROR_xxx value if aborted by an error condition
390 (e.g. stopped by repeated call or recursion limit)
391 */
392
393 static int
394 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
395 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
396 int flags, unsigned int rdepth)
397 {
398 /* These variables do not need to be preserved over recursion in this function,
399 so they can be ordinary variables in all cases. Mark some of them with
400 "register" because they are used a lot in loops. */
401
402 register int rrc; /* Returns from recursive calls */
403 register int i; /* Used for loops not involving calls to RMATCH() */
404 register unsigned int c; /* Character values not kept over RMATCH() calls */
405 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
406
407 BOOL minimize, possessive; /* Quantifier options */
408
409 /* When recursion is not being used, all "local" variables that have to be
410 preserved over calls to RMATCH() are part of a "frame" which is obtained from
411 heap storage. Set up the top-level frame here; others are obtained from the
412 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
413
414 #ifdef NO_RECURSE
415 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
416 frame->Xprevframe = NULL; /* Marks the top level */
417
418 /* Copy in the original argument variables */
419
420 frame->Xeptr = eptr;
421 frame->Xecode = ecode;
422 frame->Xmstart = mstart;
423 frame->Xoffset_top = offset_top;
424 frame->Xims = ims;
425 frame->Xeptrb = eptrb;
426 frame->Xflags = flags;
427 frame->Xrdepth = rdepth;
428
429 /* This is where control jumps back to to effect "recursion" */
430
431 HEAP_RECURSE:
432
433 /* Macros make the argument variables come from the current frame */
434
435 #define eptr frame->Xeptr
436 #define ecode frame->Xecode
437 #define mstart frame->Xmstart
438 #define offset_top frame->Xoffset_top
439 #define ims frame->Xims
440 #define eptrb frame->Xeptrb
441 #define flags frame->Xflags
442 #define rdepth frame->Xrdepth
443
444 /* Ditto for the local variables */
445
446 #ifdef SUPPORT_UTF8
447 #define charptr frame->Xcharptr
448 #endif
449 #define callpat frame->Xcallpat
450 #define data frame->Xdata
451 #define next frame->Xnext
452 #define pp frame->Xpp
453 #define prev frame->Xprev
454 #define saved_eptr frame->Xsaved_eptr
455
456 #define new_recursive frame->Xnew_recursive
457
458 #define cur_is_word frame->Xcur_is_word
459 #define condition frame->Xcondition
460 #define prev_is_word frame->Xprev_is_word
461
462 #define original_ims frame->Xoriginal_ims
463
464 #ifdef SUPPORT_UCP
465 #define prop_type frame->Xprop_type
466 #define prop_value frame->Xprop_value
467 #define prop_fail_result frame->Xprop_fail_result
468 #define prop_category frame->Xprop_category
469 #define prop_chartype frame->Xprop_chartype
470 #define prop_script frame->Xprop_script
471 #define oclength frame->Xoclength
472 #define occhars frame->Xocchars
473 #endif
474
475 #define ctype frame->Xctype
476 #define fc frame->Xfc
477 #define fi frame->Xfi
478 #define length frame->Xlength
479 #define max frame->Xmax
480 #define min frame->Xmin
481 #define number frame->Xnumber
482 #define offset frame->Xoffset
483 #define op frame->Xop
484 #define save_capture_last frame->Xsave_capture_last
485 #define save_offset1 frame->Xsave_offset1
486 #define save_offset2 frame->Xsave_offset2
487 #define save_offset3 frame->Xsave_offset3
488 #define stacksave frame->Xstacksave
489
490 #define newptrb frame->Xnewptrb
491
492 /* When recursion is being used, local variables are allocated on the stack and
493 get preserved during recursion in the normal way. In this environment, fi and
494 i, and fc and c, can be the same variables. */
495
496 #else /* NO_RECURSE not defined */
497 #define fi i
498 #define fc c
499
500
501 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
502 const uschar *charptr; /* in small blocks of the code. My normal */
503 #endif /* style of coding would have declared */
504 const uschar *callpat; /* them within each of those blocks. */
505 const uschar *data; /* However, in order to accommodate the */
506 const uschar *next; /* version of this code that uses an */
507 USPTR pp; /* external "stack" implemented on the */
508 const uschar *prev; /* heap, it is easier to declare them all */
509 USPTR saved_eptr; /* here, so the declarations can be cut */
510 /* out in a block. The only declarations */
511 recursion_info new_recursive; /* within blocks below are for variables */
512 /* that do not have to be preserved over */
513 BOOL cur_is_word; /* a recursive call to RMATCH(). */
514 BOOL condition;
515 BOOL prev_is_word;
516
517 unsigned long int original_ims;
518
519 #ifdef SUPPORT_UCP
520 int prop_type;
521 int prop_value;
522 int prop_fail_result;
523 int prop_category;
524 int prop_chartype;
525 int prop_script;
526 int oclength;
527 uschar occhars[8];
528 #endif
529
530 int ctype;
531 int length;
532 int max;
533 int min;
534 int number;
535 int offset;
536 int op;
537 int save_capture_last;
538 int save_offset1, save_offset2, save_offset3;
539 int stacksave[REC_STACK_SAVE_MAX];
540
541 eptrblock newptrb;
542 #endif /* NO_RECURSE */
543
544 /* These statements are here to stop the compiler complaining about unitialized
545 variables. */
546
547 #ifdef SUPPORT_UCP
548 prop_value = 0;
549 prop_fail_result = 0;
550 #endif
551
552
553 /* This label is used for tail recursion, which is used in a few cases even
554 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
555 used. Thanks to Ian Taylor for noticing this possibility and sending the
556 original patch. */
557
558 TAIL_RECURSE:
559
560 /* OK, now we can get on with the real code of the function. Recursive calls
561 are specified by the macro RMATCH and RRETURN is used to return. When
562 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
563 and a "return", respectively (possibly with some debugging if DEBUG is
564 defined). However, RMATCH isn't like a function call because it's quite a
565 complicated macro. It has to be used in one particular way. This shouldn't,
566 however, impact performance when true recursion is being used. */
567
568 #ifdef SUPPORT_UTF8
569 utf8 = md->utf8; /* Local copy of the flag */
570 #else
571 utf8 = FALSE;
572 #endif
573
574 /* First check that we haven't called match() too many times, or that we
575 haven't exceeded the recursive call limit. */
576
577 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
578 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
579
580 original_ims = ims; /* Save for resetting on ')' */
581
582 /* At the start of a group with an unlimited repeat that may match an empty
583 string, the match_cbegroup flag is set. When this is the case, add the current
584 subject pointer to the chain of such remembered pointers, to be checked when we
585 hit the closing ket, in order to break infinite loops that match no characters.
586 When match() is called in other circumstances, don't add to the chain. The
587 match_cbegroup flag must NOT be used with tail recursion, because the memory
588 block that is used is on the stack, so a new one may be required for each
589 match(). */
590
591 if ((flags & match_cbegroup) != 0)
592 {
593 newptrb.epb_saved_eptr = eptr;
594 newptrb.epb_prev = eptrb;
595 eptrb = &newptrb;
596 }
597
598 /* Now start processing the opcodes. */
599
600 for (;;)
601 {
602 minimize = possessive = FALSE;
603 op = *ecode;
604
605 /* For partial matching, remember if we ever hit the end of the subject after
606 matching at least one subject character. */
607
608 if (md->partial &&
609 eptr >= md->end_subject &&
610 eptr > mstart)
611 md->hitend = TRUE;
612
613 switch(op)
614 {
615 /* Handle a capturing bracket. If there is space in the offset vector, save
616 the current subject position in the working slot at the top of the vector.
617 We mustn't change the current values of the data slot, because they may be
618 set from a previous iteration of this group, and be referred to by a
619 reference inside the group.
620
621 If the bracket fails to match, we need to restore this value and also the
622 values of the final offsets, in case they were set by a previous iteration
623 of the same bracket.
624
625 If there isn't enough space in the offset vector, treat this as if it were
626 a non-capturing bracket. Don't worry about setting the flag for the error
627 case here; that is handled in the code for KET. */
628
629 case OP_CBRA:
630 case OP_SCBRA:
631 number = GET2(ecode, 1+LINK_SIZE);
632 offset = number << 1;
633
634 #ifdef DEBUG
635 printf("start bracket %d\n", number);
636 printf("subject=");
637 pchars(eptr, 16, TRUE, md);
638 printf("\n");
639 #endif
640
641 if (offset < md->offset_max)
642 {
643 save_offset1 = md->offset_vector[offset];
644 save_offset2 = md->offset_vector[offset+1];
645 save_offset3 = md->offset_vector[md->offset_end - number];
646 save_capture_last = md->capture_last;
647
648 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
649 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
650
651 flags = (op == OP_SCBRA)? match_cbegroup : 0;
652 do
653 {
654 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
655 ims, eptrb, flags, RM1);
656 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
657 md->capture_last = save_capture_last;
658 ecode += GET(ecode, 1);
659 }
660 while (*ecode == OP_ALT);
661
662 DPRINTF(("bracket %d failed\n", number));
663
664 md->offset_vector[offset] = save_offset1;
665 md->offset_vector[offset+1] = save_offset2;
666 md->offset_vector[md->offset_end - number] = save_offset3;
667
668 RRETURN(MATCH_NOMATCH);
669 }
670
671 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
672 as a non-capturing bracket. */
673
674 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
675 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676
677 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
678
679 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
680 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
681
682 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
683 final alternative within the brackets, we would return the result of a
684 recursive call to match() whatever happened. We can reduce stack usage by
685 turning this into a tail recursion, except in the case when match_cbegroup
686 is set.*/
687
688 case OP_BRA:
689 case OP_SBRA:
690 DPRINTF(("start non-capturing bracket\n"));
691 flags = (op >= OP_SBRA)? match_cbegroup : 0;
692 for (;;)
693 {
694 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
695 {
696 if (flags == 0) /* Not a possibly empty group */
697 {
698 ecode += _pcre_OP_lengths[*ecode];
699 DPRINTF(("bracket 0 tail recursion\n"));
700 goto TAIL_RECURSE;
701 }
702
703 /* Possibly empty group; can't use tail recursion. */
704
705 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
706 eptrb, flags, RM48);
707 RRETURN(rrc);
708 }
709
710 /* For non-final alternatives, continue the loop for a NOMATCH result;
711 otherwise return. */
712
713 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
714 eptrb, flags, RM2);
715 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
716 ecode += GET(ecode, 1);
717 }
718 /* Control never reaches here. */
719
720 /* Conditional group: compilation checked that there are no more than
721 two branches. If the condition is false, skipping the first branch takes us
722 past the end if there is only one branch, but that's OK because that is
723 exactly what going to the ket would do. As there is only one branch to be
724 obeyed, we can use tail recursion to avoid using another stack frame. */
725
726 case OP_COND:
727 case OP_SCOND:
728 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
729 {
730 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
731 condition = md->recursive != NULL &&
732 (offset == RREF_ANY || offset == md->recursive->group_num);
733 ecode += condition? 3 : GET(ecode, 1);
734 }
735
736 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
737 {
738 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
739 condition = offset < offset_top && md->offset_vector[offset] >= 0;
740 ecode += condition? 3 : GET(ecode, 1);
741 }
742
743 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
744 {
745 condition = FALSE;
746 ecode += GET(ecode, 1);
747 }
748
749 /* The condition is an assertion. Call match() to evaluate it - setting
750 the final argument match_condassert causes it to stop at the end of an
751 assertion. */
752
753 else
754 {
755 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
756 match_condassert, RM3);
757 if (rrc == MATCH_MATCH)
758 {
759 condition = TRUE;
760 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
761 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
762 }
763 else if (rrc != MATCH_NOMATCH)
764 {
765 RRETURN(rrc); /* Need braces because of following else */
766 }
767 else
768 {
769 condition = FALSE;
770 ecode += GET(ecode, 1);
771 }
772 }
773
774 /* We are now at the branch that is to be obeyed. As there is only one,
775 we can use tail recursion to avoid using another stack frame, except when
776 match_cbegroup is required for an unlimited repeat of a possibly empty
777 group. If the second alternative doesn't exist, we can just plough on. */
778
779 if (condition || *ecode == OP_ALT)
780 {
781 ecode += 1 + LINK_SIZE;
782 if (op == OP_SCOND) /* Possibly empty group */
783 {
784 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
785 RRETURN(rrc);
786 }
787 else /* Group must match something */
788 {
789 flags = 0;
790 goto TAIL_RECURSE;
791 }
792 }
793 else /* Condition false & no 2nd alternative */
794 {
795 ecode += 1 + LINK_SIZE;
796 }
797 break;
798
799
800 /* End of the pattern. If we are in a top-level recursion, we should
801 restore the offsets appropriately and continue from after the call. */
802
803 case OP_END:
804 if (md->recursive != NULL && md->recursive->group_num == 0)
805 {
806 recursion_info *rec = md->recursive;
807 DPRINTF(("End of pattern in a (?0) recursion\n"));
808 md->recursive = rec->prevrec;
809 memmove(md->offset_vector, rec->offset_save,
810 rec->saved_max * sizeof(int));
811 mstart = rec->save_start;
812 ims = original_ims;
813 ecode = rec->after_call;
814 break;
815 }
816
817 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
818 string - backtracking will then try other alternatives, if any. */
819
820 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
821 md->end_match_ptr = eptr; /* Record where we ended */
822 md->end_offset_top = offset_top; /* and how many extracts were taken */
823 md->start_match_ptr = mstart; /* and the start (\K can modify) */
824 RRETURN(MATCH_MATCH);
825
826 /* Change option settings */
827
828 case OP_OPT:
829 ims = ecode[1];
830 ecode += 2;
831 DPRINTF(("ims set to %02lx\n", ims));
832 break;
833
834 /* Assertion brackets. Check the alternative branches in turn - the
835 matching won't pass the KET for an assertion. If any one branch matches,
836 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
837 start of each branch to move the current point backwards, so the code at
838 this level is identical to the lookahead case. */
839
840 case OP_ASSERT:
841 case OP_ASSERTBACK:
842 do
843 {
844 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
845 RM4);
846 if (rrc == MATCH_MATCH) break;
847 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
848 ecode += GET(ecode, 1);
849 }
850 while (*ecode == OP_ALT);
851 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
852
853 /* If checking an assertion for a condition, return MATCH_MATCH. */
854
855 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
856
857 /* Continue from after the assertion, updating the offsets high water
858 mark, since extracts may have been taken during the assertion. */
859
860 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
861 ecode += 1 + LINK_SIZE;
862 offset_top = md->end_offset_top;
863 continue;
864
865 /* Negative assertion: all branches must fail to match */
866
867 case OP_ASSERT_NOT:
868 case OP_ASSERTBACK_NOT:
869 do
870 {
871 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
872 RM5);
873 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
874 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
875 ecode += GET(ecode,1);
876 }
877 while (*ecode == OP_ALT);
878
879 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
880
881 ecode += 1 + LINK_SIZE;
882 continue;
883
884 /* Move the subject pointer back. This occurs only at the start of
885 each branch of a lookbehind assertion. If we are too close to the start to
886 move back, this match function fails. When working with UTF-8 we move
887 back a number of characters, not bytes. */
888
889 case OP_REVERSE:
890 #ifdef SUPPORT_UTF8
891 if (utf8)
892 {
893 i = GET(ecode, 1);
894 while (i-- > 0)
895 {
896 eptr--;
897 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
898 BACKCHAR(eptr);
899 }
900 }
901 else
902 #endif
903
904 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
905
906 {
907 eptr -= GET(ecode, 1);
908 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
909 }
910
911 /* Skip to next op code */
912
913 ecode += 1 + LINK_SIZE;
914 break;
915
916 /* The callout item calls an external function, if one is provided, passing
917 details of the match so far. This is mainly for debugging, though the
918 function is able to force a failure. */
919
920 case OP_CALLOUT:
921 if (pcre_callout != NULL)
922 {
923 pcre_callout_block cb;
924 cb.version = 1; /* Version 1 of the callout block */
925 cb.callout_number = ecode[1];
926 cb.offset_vector = md->offset_vector;
927 cb.subject = (PCRE_SPTR)md->start_subject;
928 cb.subject_length = md->end_subject - md->start_subject;
929 cb.start_match = mstart - md->start_subject;
930 cb.current_position = eptr - md->start_subject;
931 cb.pattern_position = GET(ecode, 2);
932 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
933 cb.capture_top = offset_top/2;
934 cb.capture_last = md->capture_last;
935 cb.callout_data = md->callout_data;
936 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
937 if (rrc < 0) RRETURN(rrc);
938 }
939 ecode += 2 + 2*LINK_SIZE;
940 break;
941
942 /* Recursion either matches the current regex, or some subexpression. The
943 offset data is the offset to the starting bracket from the start of the
944 whole pattern. (This is so that it works from duplicated subpatterns.)
945
946 If there are any capturing brackets started but not finished, we have to
947 save their starting points and reinstate them after the recursion. However,
948 we don't know how many such there are (offset_top records the completed
949 total) so we just have to save all the potential data. There may be up to
950 65535 such values, which is too large to put on the stack, but using malloc
951 for small numbers seems expensive. As a compromise, the stack is used when
952 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
953 is used. A problem is what to do if the malloc fails ... there is no way of
954 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
955 values on the stack, and accept that the rest may be wrong.
956
957 There are also other values that have to be saved. We use a chained
958 sequence of blocks that actually live on the stack. Thanks to Robin Houston
959 for the original version of this logic. */
960
961 case OP_RECURSE:
962 {
963 callpat = md->start_code + GET(ecode, 1);
964 new_recursive.group_num = (callpat == md->start_code)? 0 :
965 GET2(callpat, 1 + LINK_SIZE);
966
967 /* Add to "recursing stack" */
968
969 new_recursive.prevrec = md->recursive;
970 md->recursive = &new_recursive;
971
972 /* Find where to continue from afterwards */
973
974 ecode += 1 + LINK_SIZE;
975 new_recursive.after_call = ecode;
976
977 /* Now save the offset data. */
978
979 new_recursive.saved_max = md->offset_end;
980 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
981 new_recursive.offset_save = stacksave;
982 else
983 {
984 new_recursive.offset_save =
985 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
986 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
987 }
988
989 memcpy(new_recursive.offset_save, md->offset_vector,
990 new_recursive.saved_max * sizeof(int));
991 new_recursive.save_start = mstart;
992 mstart = eptr;
993
994 /* OK, now we can do the recursion. For each top-level alternative we
995 restore the offset and recursion data. */
996
997 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
998 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
999 do
1000 {
1001 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1002 md, ims, eptrb, flags, RM6);
1003 if (rrc == MATCH_MATCH)
1004 {
1005 DPRINTF(("Recursion matched\n"));
1006 md->recursive = new_recursive.prevrec;
1007 if (new_recursive.offset_save != stacksave)
1008 (pcre_free)(new_recursive.offset_save);
1009 RRETURN(MATCH_MATCH);
1010 }
1011 else if (rrc != MATCH_NOMATCH)
1012 {
1013 DPRINTF(("Recursion gave error %d\n", rrc));
1014 RRETURN(rrc);
1015 }
1016
1017 md->recursive = &new_recursive;
1018 memcpy(md->offset_vector, new_recursive.offset_save,
1019 new_recursive.saved_max * sizeof(int));
1020 callpat += GET(callpat, 1);
1021 }
1022 while (*callpat == OP_ALT);
1023
1024 DPRINTF(("Recursion didn't match\n"));
1025 md->recursive = new_recursive.prevrec;
1026 if (new_recursive.offset_save != stacksave)
1027 (pcre_free)(new_recursive.offset_save);
1028 RRETURN(MATCH_NOMATCH);
1029 }
1030 /* Control never reaches here */
1031
1032 /* "Once" brackets are like assertion brackets except that after a match,
1033 the point in the subject string is not moved back. Thus there can never be
1034 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1035 Check the alternative branches in turn - the matching won't pass the KET
1036 for this kind of subpattern. If any one branch matches, we carry on as at
1037 the end of a normal bracket, leaving the subject pointer. */
1038
1039 case OP_ONCE:
1040 prev = ecode;
1041 saved_eptr = eptr;
1042
1043 do
1044 {
1045 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1046 if (rrc == MATCH_MATCH) break;
1047 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1048 ecode += GET(ecode,1);
1049 }
1050 while (*ecode == OP_ALT);
1051
1052 /* If hit the end of the group (which could be repeated), fail */
1053
1054 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1055
1056 /* Continue as from after the assertion, updating the offsets high water
1057 mark, since extracts may have been taken. */
1058
1059 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1060
1061 offset_top = md->end_offset_top;
1062 eptr = md->end_match_ptr;
1063
1064 /* For a non-repeating ket, just continue at this level. This also
1065 happens for a repeating ket if no characters were matched in the group.
1066 This is the forcible breaking of infinite loops as implemented in Perl
1067 5.005. If there is an options reset, it will get obeyed in the normal
1068 course of events. */
1069
1070 if (*ecode == OP_KET || eptr == saved_eptr)
1071 {
1072 ecode += 1+LINK_SIZE;
1073 break;
1074 }
1075
1076 /* The repeating kets try the rest of the pattern or restart from the
1077 preceding bracket, in the appropriate order. The second "call" of match()
1078 uses tail recursion, to avoid using another stack frame. We need to reset
1079 any options that changed within the bracket before re-running it, so
1080 check the next opcode. */
1081
1082 if (ecode[1+LINK_SIZE] == OP_OPT)
1083 {
1084 ims = (ims & ~PCRE_IMS) | ecode[4];
1085 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1086 }
1087
1088 if (*ecode == OP_KETRMIN)
1089 {
1090 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1091 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1092 ecode = prev;
1093 flags = 0;
1094 goto TAIL_RECURSE;
1095 }
1096 else /* OP_KETRMAX */
1097 {
1098 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1099 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1100 ecode += 1 + LINK_SIZE;
1101 flags = 0;
1102 goto TAIL_RECURSE;
1103 }
1104 /* Control never gets here */
1105
1106 /* An alternation is the end of a branch; scan along to find the end of the
1107 bracketed group and go to there. */
1108
1109 case OP_ALT:
1110 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1111 break;
1112
1113 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1114 that it may occur zero times. It may repeat infinitely, or not at all -
1115 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1116 repeat limits are compiled as a number of copies, with the optional ones
1117 preceded by BRAZERO or BRAMINZERO. */
1118
1119 case OP_BRAZERO:
1120 {
1121 next = ecode+1;
1122 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1123 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1124 do next += GET(next,1); while (*next == OP_ALT);
1125 ecode = next + 1 + LINK_SIZE;
1126 }
1127 break;
1128
1129 case OP_BRAMINZERO:
1130 {
1131 next = ecode+1;
1132 do next += GET(next, 1); while (*next == OP_ALT);
1133 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1134 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1135 ecode++;
1136 }
1137 break;
1138
1139 /* End of a group, repeated or non-repeating. */
1140
1141 case OP_KET:
1142 case OP_KETRMIN:
1143 case OP_KETRMAX:
1144 prev = ecode - GET(ecode, 1);
1145
1146 /* If this was a group that remembered the subject start, in order to break
1147 infinite repeats of empty string matches, retrieve the subject start from
1148 the chain. Otherwise, set it NULL. */
1149
1150 if (*prev >= OP_SBRA)
1151 {
1152 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1153 eptrb = eptrb->epb_prev; /* Backup to previous group */
1154 }
1155 else saved_eptr = NULL;
1156
1157 /* If we are at the end of an assertion group, stop matching and return
1158 MATCH_MATCH, but record the current high water mark for use by positive
1159 assertions. Do this also for the "once" (atomic) groups. */
1160
1161 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1162 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1163 *prev == OP_ONCE)
1164 {
1165 md->end_match_ptr = eptr; /* For ONCE */
1166 md->end_offset_top = offset_top;
1167 RRETURN(MATCH_MATCH);
1168 }
1169
1170 /* For capturing groups we have to check the group number back at the start
1171 and if necessary complete handling an extraction by setting the offsets and
1172 bumping the high water mark. Note that whole-pattern recursion is coded as
1173 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1174 when the OP_END is reached. Other recursion is handled here. */
1175
1176 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1177 {
1178 number = GET2(prev, 1+LINK_SIZE);
1179 offset = number << 1;
1180
1181 #ifdef DEBUG
1182 printf("end bracket %d", number);
1183 printf("\n");
1184 #endif
1185
1186 md->capture_last = number;
1187 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1188 {
1189 md->offset_vector[offset] =
1190 md->offset_vector[md->offset_end - number];
1191 md->offset_vector[offset+1] = eptr - md->start_subject;
1192 if (offset_top <= offset) offset_top = offset + 2;
1193 }
1194
1195 /* Handle a recursively called group. Restore the offsets
1196 appropriately and continue from after the call. */
1197
1198 if (md->recursive != NULL && md->recursive->group_num == number)
1199 {
1200 recursion_info *rec = md->recursive;
1201 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1202 md->recursive = rec->prevrec;
1203 mstart = rec->save_start;
1204 memcpy(md->offset_vector, rec->offset_save,
1205 rec->saved_max * sizeof(int));
1206 ecode = rec->after_call;
1207 ims = original_ims;
1208 break;
1209 }
1210 }
1211
1212 /* For both capturing and non-capturing groups, reset the value of the ims
1213 flags, in case they got changed during the group. */
1214
1215 ims = original_ims;
1216 DPRINTF(("ims reset to %02lx\n", ims));
1217
1218 /* For a non-repeating ket, just continue at this level. This also
1219 happens for a repeating ket if no characters were matched in the group.
1220 This is the forcible breaking of infinite loops as implemented in Perl
1221 5.005. If there is an options reset, it will get obeyed in the normal
1222 course of events. */
1223
1224 if (*ecode == OP_KET || eptr == saved_eptr)
1225 {
1226 ecode += 1 + LINK_SIZE;
1227 break;
1228 }
1229
1230 /* The repeating kets try the rest of the pattern or restart from the
1231 preceding bracket, in the appropriate order. In the second case, we can use
1232 tail recursion to avoid using another stack frame, unless we have an
1233 unlimited repeat of a group that can match an empty string. */
1234
1235 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1236
1237 if (*ecode == OP_KETRMIN)
1238 {
1239 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1240 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1241 if (flags != 0) /* Could match an empty string */
1242 {
1243 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1244 RRETURN(rrc);
1245 }
1246 ecode = prev;
1247 goto TAIL_RECURSE;
1248 }
1249 else /* OP_KETRMAX */
1250 {
1251 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1252 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1253 ecode += 1 + LINK_SIZE;
1254 flags = 0;
1255 goto TAIL_RECURSE;
1256 }
1257 /* Control never gets here */
1258
1259 /* Start of subject unless notbol, or after internal newline if multiline */
1260
1261 case OP_CIRC:
1262 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1263 if ((ims & PCRE_MULTILINE) != 0)
1264 {
1265 if (eptr != md->start_subject &&
1266 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1267 RRETURN(MATCH_NOMATCH);
1268 ecode++;
1269 break;
1270 }
1271 /* ... else fall through */
1272
1273 /* Start of subject assertion */
1274
1275 case OP_SOD:
1276 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1277 ecode++;
1278 break;
1279
1280 /* Start of match assertion */
1281
1282 case OP_SOM:
1283 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1284 ecode++;
1285 break;
1286
1287 /* Reset the start of match point */
1288
1289 case OP_SET_SOM:
1290 mstart = eptr;
1291 ecode++;
1292 break;
1293
1294 /* Assert before internal newline if multiline, or before a terminating
1295 newline unless endonly is set, else end of subject unless noteol is set. */
1296
1297 case OP_DOLL:
1298 if ((ims & PCRE_MULTILINE) != 0)
1299 {
1300 if (eptr < md->end_subject)
1301 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1302 else
1303 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1304 ecode++;
1305 break;
1306 }
1307 else
1308 {
1309 if (md->noteol) RRETURN(MATCH_NOMATCH);
1310 if (!md->endonly)
1311 {
1312 if (eptr != md->end_subject &&
1313 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1314 RRETURN(MATCH_NOMATCH);
1315 ecode++;
1316 break;
1317 }
1318 }
1319 /* ... else fall through for endonly */
1320
1321 /* End of subject assertion (\z) */
1322
1323 case OP_EOD:
1324 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1325 ecode++;
1326 break;
1327
1328 /* End of subject or ending \n assertion (\Z) */
1329
1330 case OP_EODN:
1331 if (eptr != md->end_subject &&
1332 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1333 RRETURN(MATCH_NOMATCH);
1334 ecode++;
1335 break;
1336
1337 /* Word boundary assertions */
1338
1339 case OP_NOT_WORD_BOUNDARY:
1340 case OP_WORD_BOUNDARY:
1341 {
1342
1343 /* Find out if the previous and current characters are "word" characters.
1344 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1345 be "non-word" characters. */
1346
1347 #ifdef SUPPORT_UTF8
1348 if (utf8)
1349 {
1350 if (eptr == md->start_subject) prev_is_word = FALSE; else
1351 {
1352 const uschar *lastptr = eptr - 1;
1353 while((*lastptr & 0xc0) == 0x80) lastptr--;
1354 GETCHAR(c, lastptr);
1355 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1356 }
1357 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1358 {
1359 GETCHAR(c, eptr);
1360 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1361 }
1362 }
1363 else
1364 #endif
1365
1366 /* More streamlined when not in UTF-8 mode */
1367
1368 {
1369 prev_is_word = (eptr != md->start_subject) &&
1370 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1371 cur_is_word = (eptr < md->end_subject) &&
1372 ((md->ctypes[*eptr] & ctype_word) != 0);
1373 }
1374
1375 /* Now see if the situation is what we want */
1376
1377 if ((*ecode++ == OP_WORD_BOUNDARY)?
1378 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1379 RRETURN(MATCH_NOMATCH);
1380 }
1381 break;
1382
1383 /* Match a single character type; inline for speed */
1384
1385 case OP_ANY:
1386 if ((ims & PCRE_DOTALL) == 0)
1387 {
1388 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1389 }
1390 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1391 if (utf8)
1392 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1393 ecode++;
1394 break;
1395
1396 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1397 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1398
1399 case OP_ANYBYTE:
1400 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1401 ecode++;
1402 break;
1403
1404 case OP_NOT_DIGIT:
1405 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1406 GETCHARINCTEST(c, eptr);
1407 if (
1408 #ifdef SUPPORT_UTF8
1409 c < 256 &&
1410 #endif
1411 (md->ctypes[c] & ctype_digit) != 0
1412 )
1413 RRETURN(MATCH_NOMATCH);
1414 ecode++;
1415 break;
1416
1417 case OP_DIGIT:
1418 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1419 GETCHARINCTEST(c, eptr);
1420 if (
1421 #ifdef SUPPORT_UTF8
1422 c >= 256 ||
1423 #endif
1424 (md->ctypes[c] & ctype_digit) == 0
1425 )
1426 RRETURN(MATCH_NOMATCH);
1427 ecode++;
1428 break;
1429
1430 case OP_NOT_WHITESPACE:
1431 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1432 GETCHARINCTEST(c, eptr);
1433 if (
1434 #ifdef SUPPORT_UTF8
1435 c < 256 &&
1436 #endif
1437 (md->ctypes[c] & ctype_space) != 0
1438 )
1439 RRETURN(MATCH_NOMATCH);
1440 ecode++;
1441 break;
1442
1443 case OP_WHITESPACE:
1444 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1445 GETCHARINCTEST(c, eptr);
1446 if (
1447 #ifdef SUPPORT_UTF8
1448 c >= 256 ||
1449 #endif
1450 (md->ctypes[c] & ctype_space) == 0
1451 )
1452 RRETURN(MATCH_NOMATCH);
1453 ecode++;
1454 break;
1455
1456 case OP_NOT_WORDCHAR:
1457 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1458 GETCHARINCTEST(c, eptr);
1459 if (
1460 #ifdef SUPPORT_UTF8
1461 c < 256 &&
1462 #endif
1463 (md->ctypes[c] & ctype_word) != 0
1464 )
1465 RRETURN(MATCH_NOMATCH);
1466 ecode++;
1467 break;
1468
1469 case OP_WORDCHAR:
1470 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1471 GETCHARINCTEST(c, eptr);
1472 if (
1473 #ifdef SUPPORT_UTF8
1474 c >= 256 ||
1475 #endif
1476 (md->ctypes[c] & ctype_word) == 0
1477 )
1478 RRETURN(MATCH_NOMATCH);
1479 ecode++;
1480 break;
1481
1482 case OP_ANYNL:
1483 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1484 GETCHARINCTEST(c, eptr);
1485 switch(c)
1486 {
1487 default: RRETURN(MATCH_NOMATCH);
1488 case 0x000d:
1489 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1490 break;
1491 case 0x000a:
1492 case 0x000b:
1493 case 0x000c:
1494 case 0x0085:
1495 case 0x2028:
1496 case 0x2029:
1497 break;
1498 }
1499 ecode++;
1500 break;
1501
1502 case OP_NOT_HSPACE:
1503 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1504 GETCHARINCTEST(c, eptr);
1505 switch(c)
1506 {
1507 default: break;
1508 case 0x09: /* HT */
1509 case 0x20: /* SPACE */
1510 case 0xa0: /* NBSP */
1511 case 0x1680: /* OGHAM SPACE MARK */
1512 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1513 case 0x2000: /* EN QUAD */
1514 case 0x2001: /* EM QUAD */
1515 case 0x2002: /* EN SPACE */
1516 case 0x2003: /* EM SPACE */
1517 case 0x2004: /* THREE-PER-EM SPACE */
1518 case 0x2005: /* FOUR-PER-EM SPACE */
1519 case 0x2006: /* SIX-PER-EM SPACE */
1520 case 0x2007: /* FIGURE SPACE */
1521 case 0x2008: /* PUNCTUATION SPACE */
1522 case 0x2009: /* THIN SPACE */
1523 case 0x200A: /* HAIR SPACE */
1524 case 0x202f: /* NARROW NO-BREAK SPACE */
1525 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1526 case 0x3000: /* IDEOGRAPHIC SPACE */
1527 RRETURN(MATCH_NOMATCH);
1528 }
1529 ecode++;
1530 break;
1531
1532 case OP_HSPACE:
1533 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1534 GETCHARINCTEST(c, eptr);
1535 switch(c)
1536 {
1537 default: RRETURN(MATCH_NOMATCH);
1538 case 0x09: /* HT */
1539 case 0x20: /* SPACE */
1540 case 0xa0: /* NBSP */
1541 case 0x1680: /* OGHAM SPACE MARK */
1542 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1543 case 0x2000: /* EN QUAD */
1544 case 0x2001: /* EM QUAD */
1545 case 0x2002: /* EN SPACE */
1546 case 0x2003: /* EM SPACE */
1547 case 0x2004: /* THREE-PER-EM SPACE */
1548 case 0x2005: /* FOUR-PER-EM SPACE */
1549 case 0x2006: /* SIX-PER-EM SPACE */
1550 case 0x2007: /* FIGURE SPACE */
1551 case 0x2008: /* PUNCTUATION SPACE */
1552 case 0x2009: /* THIN SPACE */
1553 case 0x200A: /* HAIR SPACE */
1554 case 0x202f: /* NARROW NO-BREAK SPACE */
1555 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1556 case 0x3000: /* IDEOGRAPHIC SPACE */
1557 break;
1558 }
1559 ecode++;
1560 break;
1561
1562 case OP_NOT_VSPACE:
1563 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1564 GETCHARINCTEST(c, eptr);
1565 switch(c)
1566 {
1567 default: break;
1568 case 0x0a: /* LF */
1569 case 0x0b: /* VT */
1570 case 0x0c: /* FF */
1571 case 0x0d: /* CR */
1572 case 0x85: /* NEL */
1573 case 0x2028: /* LINE SEPARATOR */
1574 case 0x2029: /* PARAGRAPH SEPARATOR */
1575 RRETURN(MATCH_NOMATCH);
1576 }
1577 ecode++;
1578 break;
1579
1580 case OP_VSPACE:
1581 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1582 GETCHARINCTEST(c, eptr);
1583 switch(c)
1584 {
1585 default: RRETURN(MATCH_NOMATCH);
1586 case 0x0a: /* LF */
1587 case 0x0b: /* VT */
1588 case 0x0c: /* FF */
1589 case 0x0d: /* CR */
1590 case 0x85: /* NEL */
1591 case 0x2028: /* LINE SEPARATOR */
1592 case 0x2029: /* PARAGRAPH SEPARATOR */
1593 break;
1594 }
1595 ecode++;
1596 break;
1597
1598 #ifdef SUPPORT_UCP
1599 /* Check the next character by Unicode property. We will get here only
1600 if the support is in the binary; otherwise a compile-time error occurs. */
1601
1602 case OP_PROP:
1603 case OP_NOTPROP:
1604 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1605 GETCHARINCTEST(c, eptr);
1606 {
1607 int chartype, script;
1608 int category = _pcre_ucp_findprop(c, &chartype, &script);
1609
1610 switch(ecode[1])
1611 {
1612 case PT_ANY:
1613 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1614 break;
1615
1616 case PT_LAMP:
1617 if ((chartype == ucp_Lu ||
1618 chartype == ucp_Ll ||
1619 chartype == ucp_Lt) == (op == OP_NOTPROP))
1620 RRETURN(MATCH_NOMATCH);
1621 break;
1622
1623 case PT_GC:
1624 if ((ecode[2] != category) == (op == OP_PROP))
1625 RRETURN(MATCH_NOMATCH);
1626 break;
1627
1628 case PT_PC:
1629 if ((ecode[2] != chartype) == (op == OP_PROP))
1630 RRETURN(MATCH_NOMATCH);
1631 break;
1632
1633 case PT_SC:
1634 if ((ecode[2] != script) == (op == OP_PROP))
1635 RRETURN(MATCH_NOMATCH);
1636 break;
1637
1638 default:
1639 RRETURN(PCRE_ERROR_INTERNAL);
1640 }
1641
1642 ecode += 3;
1643 }
1644 break;
1645
1646 /* Match an extended Unicode sequence. We will get here only if the support
1647 is in the binary; otherwise a compile-time error occurs. */
1648
1649 case OP_EXTUNI:
1650 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1651 GETCHARINCTEST(c, eptr);
1652 {
1653 int chartype, script;
1654 int category = _pcre_ucp_findprop(c, &chartype, &script);
1655 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1656 while (eptr < md->end_subject)
1657 {
1658 int len = 1;
1659 if (!utf8) c = *eptr; else
1660 {
1661 GETCHARLEN(c, eptr, len);
1662 }
1663 category = _pcre_ucp_findprop(c, &chartype, &script);
1664 if (category != ucp_M) break;
1665 eptr += len;
1666 }
1667 }
1668 ecode++;
1669 break;
1670 #endif
1671
1672
1673 /* Match a back reference, possibly repeatedly. Look past the end of the
1674 item to see if there is repeat information following. The code is similar
1675 to that for character classes, but repeated for efficiency. Then obey
1676 similar code to character type repeats - written out again for speed.
1677 However, if the referenced string is the empty string, always treat
1678 it as matched, any number of times (otherwise there could be infinite
1679 loops). */
1680
1681 case OP_REF:
1682 {
1683 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1684 ecode += 3; /* Advance past item */
1685
1686 /* If the reference is unset, set the length to be longer than the amount
1687 of subject left; this ensures that every attempt at a match fails. We
1688 can't just fail here, because of the possibility of quantifiers with zero
1689 minima. */
1690
1691 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1692 md->end_subject - eptr + 1 :
1693 md->offset_vector[offset+1] - md->offset_vector[offset];
1694
1695 /* Set up for repetition, or handle the non-repeated case */
1696
1697 switch (*ecode)
1698 {
1699 case OP_CRSTAR:
1700 case OP_CRMINSTAR:
1701 case OP_CRPLUS:
1702 case OP_CRMINPLUS:
1703 case OP_CRQUERY:
1704 case OP_CRMINQUERY:
1705 c = *ecode++ - OP_CRSTAR;
1706 minimize = (c & 1) != 0;
1707 min = rep_min[c]; /* Pick up values from tables; */
1708 max = rep_max[c]; /* zero for max => infinity */
1709 if (max == 0) max = INT_MAX;
1710 break;
1711
1712 case OP_CRRANGE:
1713 case OP_CRMINRANGE:
1714 minimize = (*ecode == OP_CRMINRANGE);
1715 min = GET2(ecode, 1);
1716 max = GET2(ecode, 3);
1717 if (max == 0) max = INT_MAX;
1718 ecode += 5;
1719 break;
1720
1721 default: /* No repeat follows */
1722 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1723 eptr += length;
1724 continue; /* With the main loop */
1725 }
1726
1727 /* If the length of the reference is zero, just continue with the
1728 main loop. */
1729
1730 if (length == 0) continue;
1731
1732 /* First, ensure the minimum number of matches are present. We get back
1733 the length of the reference string explicitly rather than passing the
1734 address of eptr, so that eptr can be a register variable. */
1735
1736 for (i = 1; i <= min; i++)
1737 {
1738 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1739 eptr += length;
1740 }
1741
1742 /* If min = max, continue at the same level without recursion.
1743 They are not both allowed to be zero. */
1744
1745 if (min == max) continue;
1746
1747 /* If minimizing, keep trying and advancing the pointer */
1748
1749 if (minimize)
1750 {
1751 for (fi = min;; fi++)
1752 {
1753 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1754 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1755 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1756 RRETURN(MATCH_NOMATCH);
1757 eptr += length;
1758 }
1759 /* Control never gets here */
1760 }
1761
1762 /* If maximizing, find the longest string and work backwards */
1763
1764 else
1765 {
1766 pp = eptr;
1767 for (i = min; i < max; i++)
1768 {
1769 if (!match_ref(offset, eptr, length, md, ims)) break;
1770 eptr += length;
1771 }
1772 while (eptr >= pp)
1773 {
1774 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1775 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1776 eptr -= length;
1777 }
1778 RRETURN(MATCH_NOMATCH);
1779 }
1780 }
1781 /* Control never gets here */
1782
1783
1784
1785 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1786 used when all the characters in the class have values in the range 0-255,
1787 and either the matching is caseful, or the characters are in the range
1788 0-127 when UTF-8 processing is enabled. The only difference between
1789 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1790 encountered.
1791
1792 First, look past the end of the item to see if there is repeat information
1793 following. Then obey similar code to character type repeats - written out
1794 again for speed. */
1795
1796 case OP_NCLASS:
1797 case OP_CLASS:
1798 {
1799 data = ecode + 1; /* Save for matching */
1800 ecode += 33; /* Advance past the item */
1801
1802 switch (*ecode)
1803 {
1804 case OP_CRSTAR:
1805 case OP_CRMINSTAR:
1806 case OP_CRPLUS:
1807 case OP_CRMINPLUS:
1808 case OP_CRQUERY:
1809 case OP_CRMINQUERY:
1810 c = *ecode++ - OP_CRSTAR;
1811 minimize = (c & 1) != 0;
1812 min = rep_min[c]; /* Pick up values from tables; */
1813 max = rep_max[c]; /* zero for max => infinity */
1814 if (max == 0) max = INT_MAX;
1815 break;
1816
1817 case OP_CRRANGE:
1818 case OP_CRMINRANGE:
1819 minimize = (*ecode == OP_CRMINRANGE);
1820 min = GET2(ecode, 1);
1821 max = GET2(ecode, 3);
1822 if (max == 0) max = INT_MAX;
1823 ecode += 5;
1824 break;
1825
1826 default: /* No repeat follows */
1827 min = max = 1;
1828 break;
1829 }
1830
1831 /* First, ensure the minimum number of matches are present. */
1832
1833 #ifdef SUPPORT_UTF8
1834 /* UTF-8 mode */
1835 if (utf8)
1836 {
1837 for (i = 1; i <= min; i++)
1838 {
1839 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1840 GETCHARINC(c, eptr);
1841 if (c > 255)
1842 {
1843 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1844 }
1845 else
1846 {
1847 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1848 }
1849 }
1850 }
1851 else
1852 #endif
1853 /* Not UTF-8 mode */
1854 {
1855 for (i = 1; i <= min; i++)
1856 {
1857 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1858 c = *eptr++;
1859 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1860 }
1861 }
1862
1863 /* If max == min we can continue with the main loop without the
1864 need to recurse. */
1865
1866 if (min == max) continue;
1867
1868 /* If minimizing, keep testing the rest of the expression and advancing
1869 the pointer while it matches the class. */
1870
1871 if (minimize)
1872 {
1873 #ifdef SUPPORT_UTF8
1874 /* UTF-8 mode */
1875 if (utf8)
1876 {
1877 for (fi = min;; fi++)
1878 {
1879 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1880 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1881 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1882 GETCHARINC(c, eptr);
1883 if (c > 255)
1884 {
1885 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1886 }
1887 else
1888 {
1889 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1890 }
1891 }
1892 }
1893 else
1894 #endif
1895 /* Not UTF-8 mode */
1896 {
1897 for (fi = min;; fi++)
1898 {
1899 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1900 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902 c = *eptr++;
1903 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1904 }
1905 }
1906 /* Control never gets here */
1907 }
1908
1909 /* If maximizing, find the longest possible run, then work backwards. */
1910
1911 else
1912 {
1913 pp = eptr;
1914
1915 #ifdef SUPPORT_UTF8
1916 /* UTF-8 mode */
1917 if (utf8)
1918 {
1919 for (i = min; i < max; i++)
1920 {
1921 int len = 1;
1922 if (eptr >= md->end_subject) break;
1923 GETCHARLEN(c, eptr, len);
1924 if (c > 255)
1925 {
1926 if (op == OP_CLASS) break;
1927 }
1928 else
1929 {
1930 if ((data[c/8] & (1 << (c&7))) == 0) break;
1931 }
1932 eptr += len;
1933 }
1934 for (;;)
1935 {
1936 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1937 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1938 if (eptr-- == pp) break; /* Stop if tried at original pos */
1939 BACKCHAR(eptr);
1940 }
1941 }
1942 else
1943 #endif
1944 /* Not UTF-8 mode */
1945 {
1946 for (i = min; i < max; i++)
1947 {
1948 if (eptr >= md->end_subject) break;
1949 c = *eptr;
1950 if ((data[c/8] & (1 << (c&7))) == 0) break;
1951 eptr++;
1952 }
1953 while (eptr >= pp)
1954 {
1955 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1956 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1957 eptr--;
1958 }
1959 }
1960
1961 RRETURN(MATCH_NOMATCH);
1962 }
1963 }
1964 /* Control never gets here */
1965
1966
1967 /* Match an extended character class. This opcode is encountered only
1968 in UTF-8 mode, because that's the only time it is compiled. */
1969
1970 #ifdef SUPPORT_UTF8
1971 case OP_XCLASS:
1972 {
1973 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1974 ecode += GET(ecode, 1); /* Advance past the item */
1975
1976 switch (*ecode)
1977 {
1978 case OP_CRSTAR:
1979 case OP_CRMINSTAR:
1980 case OP_CRPLUS:
1981 case OP_CRMINPLUS:
1982 case OP_CRQUERY:
1983 case OP_CRMINQUERY:
1984 c = *ecode++ - OP_CRSTAR;
1985 minimize = (c & 1) != 0;
1986 min = rep_min[c]; /* Pick up values from tables; */
1987 max = rep_max[c]; /* zero for max => infinity */
1988 if (max == 0) max = INT_MAX;
1989 break;
1990
1991 case OP_CRRANGE:
1992 case OP_CRMINRANGE:
1993 minimize = (*ecode == OP_CRMINRANGE);
1994 min = GET2(ecode, 1);
1995 max = GET2(ecode, 3);
1996 if (max == 0) max = INT_MAX;
1997 ecode += 5;
1998 break;
1999
2000 default: /* No repeat follows */
2001 min = max = 1;
2002 break;
2003 }
2004
2005 /* First, ensure the minimum number of matches are present. */
2006
2007 for (i = 1; i <= min; i++)
2008 {
2009 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2010 GETCHARINC(c, eptr);
2011 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2012 }
2013
2014 /* If max == min we can continue with the main loop without the
2015 need to recurse. */
2016
2017 if (min == max) continue;
2018
2019 /* If minimizing, keep testing the rest of the expression and advancing
2020 the pointer while it matches the class. */
2021
2022 if (minimize)
2023 {
2024 for (fi = min;; fi++)
2025 {
2026 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2027 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2028 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2029 GETCHARINC(c, eptr);
2030 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2031 }
2032 /* Control never gets here */
2033 }
2034
2035 /* If maximizing, find the longest possible run, then work backwards. */
2036
2037 else
2038 {
2039 pp = eptr;
2040 for (i = min; i < max; i++)
2041 {
2042 int len = 1;
2043 if (eptr >= md->end_subject) break;
2044 GETCHARLEN(c, eptr, len);
2045 if (!_pcre_xclass(c, data)) break;
2046 eptr += len;
2047 }
2048 for(;;)
2049 {
2050 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2051 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2052 if (eptr-- == pp) break; /* Stop if tried at original pos */
2053 BACKCHAR(eptr);
2054 }
2055 RRETURN(MATCH_NOMATCH);
2056 }
2057
2058 /* Control never gets here */
2059 }
2060 #endif /* End of XCLASS */
2061
2062 /* Match a single character, casefully */
2063
2064 case OP_CHAR:
2065 #ifdef SUPPORT_UTF8
2066 if (utf8)
2067 {
2068 length = 1;
2069 ecode++;
2070 GETCHARLEN(fc, ecode, length);
2071 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2072 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2073 }
2074 else
2075 #endif
2076
2077 /* Non-UTF-8 mode */
2078 {
2079 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2080 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2081 ecode += 2;
2082 }
2083 break;
2084
2085 /* Match a single character, caselessly */
2086
2087 case OP_CHARNC:
2088 #ifdef SUPPORT_UTF8
2089 if (utf8)
2090 {
2091 length = 1;
2092 ecode++;
2093 GETCHARLEN(fc, ecode, length);
2094
2095 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2096
2097 /* If the pattern character's value is < 128, we have only one byte, and
2098 can use the fast lookup table. */
2099
2100 if (fc < 128)
2101 {
2102 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2103 }
2104
2105 /* Otherwise we must pick up the subject character */
2106
2107 else
2108 {
2109 unsigned int dc;
2110 GETCHARINC(dc, eptr);
2111 ecode += length;
2112
2113 /* If we have Unicode property support, we can use it to test the other
2114 case of the character, if there is one. */
2115
2116 if (fc != dc)
2117 {
2118 #ifdef SUPPORT_UCP
2119 if (dc != _pcre_ucp_othercase(fc))
2120 #endif
2121 RRETURN(MATCH_NOMATCH);
2122 }
2123 }
2124 }
2125 else
2126 #endif /* SUPPORT_UTF8 */
2127
2128 /* Non-UTF-8 mode */
2129 {
2130 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2131 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2132 ecode += 2;
2133 }
2134 break;
2135
2136 /* Match a single character repeatedly. */
2137
2138 case OP_EXACT:
2139 min = max = GET2(ecode, 1);
2140 ecode += 3;
2141 goto REPEATCHAR;
2142
2143 case OP_POSUPTO:
2144 possessive = TRUE;
2145 /* Fall through */
2146
2147 case OP_UPTO:
2148 case OP_MINUPTO:
2149 min = 0;
2150 max = GET2(ecode, 1);
2151 minimize = *ecode == OP_MINUPTO;
2152 ecode += 3;
2153 goto REPEATCHAR;
2154
2155 case OP_POSSTAR:
2156 possessive = TRUE;
2157 min = 0;
2158 max = INT_MAX;
2159 ecode++;
2160 goto REPEATCHAR;
2161
2162 case OP_POSPLUS:
2163 possessive = TRUE;
2164 min = 1;
2165 max = INT_MAX;
2166 ecode++;
2167 goto REPEATCHAR;
2168
2169 case OP_POSQUERY:
2170 possessive = TRUE;
2171 min = 0;
2172 max = 1;
2173 ecode++;
2174 goto REPEATCHAR;
2175
2176 case OP_STAR:
2177 case OP_MINSTAR:
2178 case OP_PLUS:
2179 case OP_MINPLUS:
2180 case OP_QUERY:
2181 case OP_MINQUERY:
2182 c = *ecode++ - OP_STAR;
2183 minimize = (c & 1) != 0;
2184 min = rep_min[c]; /* Pick up values from tables; */
2185 max = rep_max[c]; /* zero for max => infinity */
2186 if (max == 0) max = INT_MAX;
2187
2188 /* Common code for all repeated single-character matches. We can give
2189 up quickly if there are fewer than the minimum number of characters left in
2190 the subject. */
2191
2192 REPEATCHAR:
2193 #ifdef SUPPORT_UTF8
2194 if (utf8)
2195 {
2196 length = 1;
2197 charptr = ecode;
2198 GETCHARLEN(fc, ecode, length);
2199 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2200 ecode += length;
2201
2202 /* Handle multibyte character matching specially here. There is
2203 support for caseless matching if UCP support is present. */
2204
2205 if (length > 1)
2206 {
2207 #ifdef SUPPORT_UCP
2208 unsigned int othercase;
2209 if ((ims & PCRE_CASELESS) != 0 &&
2210 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2211 oclength = _pcre_ord2utf8(othercase, occhars);
2212 else oclength = 0;
2213 #endif /* SUPPORT_UCP */
2214
2215 for (i = 1; i <= min; i++)
2216 {
2217 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2218 #ifdef SUPPORT_UCP
2219 /* Need braces because of following else */
2220 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2221 else
2222 {
2223 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2224 eptr += oclength;
2225 }
2226 #else /* without SUPPORT_UCP */
2227 else { RRETURN(MATCH_NOMATCH); }
2228 #endif /* SUPPORT_UCP */
2229 }
2230
2231 if (min == max) continue;
2232
2233 if (minimize)
2234 {
2235 for (fi = min;; fi++)
2236 {
2237 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2238 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2239 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2240 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2241 #ifdef SUPPORT_UCP
2242 /* Need braces because of following else */
2243 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2244 else
2245 {
2246 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2247 eptr += oclength;
2248 }
2249 #else /* without SUPPORT_UCP */
2250 else { RRETURN (MATCH_NOMATCH); }
2251 #endif /* SUPPORT_UCP */
2252 }
2253 /* Control never gets here */
2254 }
2255
2256 else /* Maximize */
2257 {
2258 pp = eptr;
2259 for (i = min; i < max; i++)
2260 {
2261 if (eptr > md->end_subject - length) break;
2262 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2263 #ifdef SUPPORT_UCP
2264 else if (oclength == 0) break;
2265 else
2266 {
2267 if (memcmp(eptr, occhars, oclength) != 0) break;
2268 eptr += oclength;
2269 }
2270 #else /* without SUPPORT_UCP */
2271 else break;
2272 #endif /* SUPPORT_UCP */
2273 }
2274
2275 if (possessive) continue;
2276 for(;;)
2277 {
2278 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2279 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2280 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2281 #ifdef SUPPORT_UCP
2282 eptr--;
2283 BACKCHAR(eptr);
2284 #else /* without SUPPORT_UCP */
2285 eptr -= length;
2286 #endif /* SUPPORT_UCP */
2287 }
2288 }
2289 /* Control never gets here */
2290 }
2291
2292 /* If the length of a UTF-8 character is 1, we fall through here, and
2293 obey the code as for non-UTF-8 characters below, though in this case the
2294 value of fc will always be < 128. */
2295 }
2296 else
2297 #endif /* SUPPORT_UTF8 */
2298
2299 /* When not in UTF-8 mode, load a single-byte character. */
2300 {
2301 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2302 fc = *ecode++;
2303 }
2304
2305 /* The value of fc at this point is always less than 256, though we may or
2306 may not be in UTF-8 mode. The code is duplicated for the caseless and
2307 caseful cases, for speed, since matching characters is likely to be quite
2308 common. First, ensure the minimum number of matches are present. If min =
2309 max, continue at the same level without recursing. Otherwise, if
2310 minimizing, keep trying the rest of the expression and advancing one
2311 matching character if failing, up to the maximum. Alternatively, if
2312 maximizing, find the maximum number of characters and work backwards. */
2313
2314 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2315 max, eptr));
2316
2317 if ((ims & PCRE_CASELESS) != 0)
2318 {
2319 fc = md->lcc[fc];
2320 for (i = 1; i <= min; i++)
2321 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2322 if (min == max) continue;
2323 if (minimize)
2324 {
2325 for (fi = min;; fi++)
2326 {
2327 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2328 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329 if (fi >= max || eptr >= md->end_subject ||
2330 fc != md->lcc[*eptr++])
2331 RRETURN(MATCH_NOMATCH);
2332 }
2333 /* Control never gets here */
2334 }
2335 else /* Maximize */
2336 {
2337 pp = eptr;
2338 for (i = min; i < max; i++)
2339 {
2340 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2341 eptr++;
2342 }
2343 if (possessive) continue;
2344 while (eptr >= pp)
2345 {
2346 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2347 eptr--;
2348 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2349 }
2350 RRETURN(MATCH_NOMATCH);
2351 }
2352 /* Control never gets here */
2353 }
2354
2355 /* Caseful comparisons (includes all multi-byte characters) */
2356
2357 else
2358 {
2359 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2360 if (min == max) continue;
2361 if (minimize)
2362 {
2363 for (fi = min;; fi++)
2364 {
2365 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2366 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2367 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2368 RRETURN(MATCH_NOMATCH);
2369 }
2370 /* Control never gets here */
2371 }
2372 else /* Maximize */
2373 {
2374 pp = eptr;
2375 for (i = min; i < max; i++)
2376 {
2377 if (eptr >= md->end_subject || fc != *eptr) break;
2378 eptr++;
2379 }
2380 if (possessive) continue;
2381 while (eptr >= pp)
2382 {
2383 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2384 eptr--;
2385 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386 }
2387 RRETURN(MATCH_NOMATCH);
2388 }
2389 }
2390 /* Control never gets here */
2391
2392 /* Match a negated single one-byte character. The character we are
2393 checking can be multibyte. */
2394
2395 case OP_NOT:
2396 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2397 ecode++;
2398 GETCHARINCTEST(c, eptr);
2399 if ((ims & PCRE_CASELESS) != 0)
2400 {
2401 #ifdef SUPPORT_UTF8
2402 if (c < 256)
2403 #endif
2404 c = md->lcc[c];
2405 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2406 }
2407 else
2408 {
2409 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2410 }
2411 break;
2412
2413 /* Match a negated single one-byte character repeatedly. This is almost a
2414 repeat of the code for a repeated single character, but I haven't found a
2415 nice way of commoning these up that doesn't require a test of the
2416 positive/negative option for each character match. Maybe that wouldn't add
2417 very much to the time taken, but character matching *is* what this is all
2418 about... */
2419
2420 case OP_NOTEXACT:
2421 min = max = GET2(ecode, 1);
2422 ecode += 3;
2423 goto REPEATNOTCHAR;
2424
2425 case OP_NOTUPTO:
2426 case OP_NOTMINUPTO:
2427 min = 0;
2428 max = GET2(ecode, 1);
2429 minimize = *ecode == OP_NOTMINUPTO;
2430 ecode += 3;
2431 goto REPEATNOTCHAR;
2432
2433 case OP_NOTPOSSTAR:
2434 possessive = TRUE;
2435 min = 0;
2436 max = INT_MAX;
2437 ecode++;
2438 goto REPEATNOTCHAR;
2439
2440 case OP_NOTPOSPLUS:
2441 possessive = TRUE;
2442 min = 1;
2443 max = INT_MAX;
2444 ecode++;
2445 goto REPEATNOTCHAR;
2446
2447 case OP_NOTPOSQUERY:
2448 possessive = TRUE;
2449 min = 0;
2450 max = 1;
2451 ecode++;
2452 goto REPEATNOTCHAR;
2453
2454 case OP_NOTPOSUPTO:
2455 possessive = TRUE;
2456 min = 0;
2457 max = GET2(ecode, 1);
2458 ecode += 3;
2459 goto REPEATNOTCHAR;
2460
2461 case OP_NOTSTAR:
2462 case OP_NOTMINSTAR:
2463 case OP_NOTPLUS:
2464 case OP_NOTMINPLUS:
2465 case OP_NOTQUERY:
2466 case OP_NOTMINQUERY:
2467 c = *ecode++ - OP_NOTSTAR;
2468 minimize = (c & 1) != 0;
2469 min = rep_min[c]; /* Pick up values from tables; */
2470 max = rep_max[c]; /* zero for max => infinity */
2471 if (max == 0) max = INT_MAX;
2472
2473 /* Common code for all repeated single-byte matches. We can give up quickly
2474 if there are fewer than the minimum number of bytes left in the
2475 subject. */
2476
2477 REPEATNOTCHAR:
2478 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2479 fc = *ecode++;
2480
2481 /* The code is duplicated for the caseless and caseful cases, for speed,
2482 since matching characters is likely to be quite common. First, ensure the
2483 minimum number of matches are present. If min = max, continue at the same
2484 level without recursing. Otherwise, if minimizing, keep trying the rest of
2485 the expression and advancing one matching character if failing, up to the
2486 maximum. Alternatively, if maximizing, find the maximum number of
2487 characters and work backwards. */
2488
2489 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2490 max, eptr));
2491
2492 if ((ims & PCRE_CASELESS) != 0)
2493 {
2494 fc = md->lcc[fc];
2495
2496 #ifdef SUPPORT_UTF8
2497 /* UTF-8 mode */
2498 if (utf8)
2499 {
2500 register unsigned int d;
2501 for (i = 1; i <= min; i++)
2502 {
2503 GETCHARINC(d, eptr);
2504 if (d < 256) d = md->lcc[d];
2505 if (fc == d) RRETURN(MATCH_NOMATCH);
2506 }
2507 }
2508 else
2509 #endif
2510
2511 /* Not UTF-8 mode */
2512 {
2513 for (i = 1; i <= min; i++)
2514 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2515 }
2516
2517 if (min == max) continue;
2518
2519 if (minimize)
2520 {
2521 #ifdef SUPPORT_UTF8
2522 /* UTF-8 mode */
2523 if (utf8)
2524 {
2525 register unsigned int d;
2526 for (fi = min;; fi++)
2527 {
2528 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2529 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2530 GETCHARINC(d, eptr);
2531 if (d < 256) d = md->lcc[d];
2532 if (fi >= max || eptr >= md->end_subject || fc == d)
2533 RRETURN(MATCH_NOMATCH);
2534 }
2535 }
2536 else
2537 #endif
2538 /* Not UTF-8 mode */
2539 {
2540 for (fi = min;; fi++)
2541 {
2542 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2543 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2544 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2545 RRETURN(MATCH_NOMATCH);
2546 }
2547 }
2548 /* Control never gets here */
2549 }
2550
2551 /* Maximize case */
2552
2553 else
2554 {
2555 pp = eptr;
2556
2557 #ifdef SUPPORT_UTF8
2558 /* UTF-8 mode */
2559 if (utf8)
2560 {
2561 register unsigned int d;
2562 for (i = min; i < max; i++)
2563 {
2564 int len = 1;
2565 if (eptr >= md->end_subject) break;
2566 GETCHARLEN(d, eptr, len);
2567 if (d < 256) d = md->lcc[d];
2568 if (fc == d) break;
2569 eptr += len;
2570 }
2571 if (possessive) continue;
2572 for(;;)
2573 {
2574 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2575 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2576 if (eptr-- == pp) break; /* Stop if tried at original pos */
2577 BACKCHAR(eptr);
2578 }
2579 }
2580 else
2581 #endif
2582 /* Not UTF-8 mode */
2583 {
2584 for (i = min; i < max; i++)
2585 {
2586 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2587 eptr++;
2588 }
2589 if (possessive) continue;
2590 while (eptr >= pp)
2591 {
2592 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2593 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2594 eptr--;
2595 }
2596 }
2597
2598 RRETURN(MATCH_NOMATCH);
2599 }
2600 /* Control never gets here */
2601 }
2602
2603 /* Caseful comparisons */
2604
2605 else
2606 {
2607 #ifdef SUPPORT_UTF8
2608 /* UTF-8 mode */
2609 if (utf8)
2610 {
2611 register unsigned int d;
2612 for (i = 1; i <= min; i++)
2613 {
2614 GETCHARINC(d, eptr);
2615 if (fc == d) RRETURN(MATCH_NOMATCH);
2616 }
2617 }
2618 else
2619 #endif
2620 /* Not UTF-8 mode */
2621 {
2622 for (i = 1; i <= min; i++)
2623 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2624 }
2625
2626 if (min == max) continue;
2627
2628 if (minimize)
2629 {
2630 #ifdef SUPPORT_UTF8
2631 /* UTF-8 mode */
2632 if (utf8)
2633 {
2634 register unsigned int d;
2635 for (fi = min;; fi++)
2636 {
2637 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2638 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639 GETCHARINC(d, eptr);
2640 if (fi >= max || eptr >= md->end_subject || fc == d)
2641 RRETURN(MATCH_NOMATCH);
2642 }
2643 }
2644 else
2645 #endif
2646 /* Not UTF-8 mode */
2647 {
2648 for (fi = min;; fi++)
2649 {
2650 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2651 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2653 RRETURN(MATCH_NOMATCH);
2654 }
2655 }
2656 /* Control never gets here */
2657 }
2658
2659 /* Maximize case */
2660
2661 else
2662 {
2663 pp = eptr;
2664
2665 #ifdef SUPPORT_UTF8
2666 /* UTF-8 mode */
2667 if (utf8)
2668 {
2669 register unsigned int d;
2670 for (i = min; i < max; i++)
2671 {
2672 int len = 1;
2673 if (eptr >= md->end_subject) break;
2674 GETCHARLEN(d, eptr, len);
2675 if (fc == d) break;
2676 eptr += len;
2677 }
2678 if (possessive) continue;
2679 for(;;)
2680 {
2681 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2682 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2683 if (eptr-- == pp) break; /* Stop if tried at original pos */
2684 BACKCHAR(eptr);
2685 }
2686 }
2687 else
2688 #endif
2689 /* Not UTF-8 mode */
2690 {
2691 for (i = min; i < max; i++)
2692 {
2693 if (eptr >= md->end_subject || fc == *eptr) break;
2694 eptr++;
2695 }
2696 if (possessive) continue;
2697 while (eptr >= pp)
2698 {
2699 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2700 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2701 eptr--;
2702 }
2703 }
2704
2705 RRETURN(MATCH_NOMATCH);
2706 }
2707 }
2708 /* Control never gets here */
2709
2710 /* Match a single character type repeatedly; several different opcodes
2711 share code. This is very similar to the code for single characters, but we
2712 repeat it in the interests of efficiency. */
2713
2714 case OP_TYPEEXACT:
2715 min = max = GET2(ecode, 1);
2716 minimize = TRUE;
2717 ecode += 3;
2718 goto REPEATTYPE;
2719
2720 case OP_TYPEUPTO:
2721 case OP_TYPEMINUPTO:
2722 min = 0;
2723 max = GET2(ecode, 1);
2724 minimize = *ecode == OP_TYPEMINUPTO;
2725 ecode += 3;
2726 goto REPEATTYPE;
2727
2728 case OP_TYPEPOSSTAR:
2729 possessive = TRUE;
2730 min = 0;
2731 max = INT_MAX;
2732 ecode++;
2733 goto REPEATTYPE;
2734
2735 case OP_TYPEPOSPLUS:
2736 possessive = TRUE;
2737 min = 1;
2738 max = INT_MAX;
2739 ecode++;
2740 goto REPEATTYPE;
2741
2742 case OP_TYPEPOSQUERY:
2743 possessive = TRUE;
2744 min = 0;
2745 max = 1;
2746 ecode++;
2747 goto REPEATTYPE;
2748
2749 case OP_TYPEPOSUPTO:
2750 possessive = TRUE;
2751 min = 0;
2752 max = GET2(ecode, 1);
2753 ecode += 3;
2754 goto REPEATTYPE;
2755
2756 case OP_TYPESTAR:
2757 case OP_TYPEMINSTAR:
2758 case OP_TYPEPLUS:
2759 case OP_TYPEMINPLUS:
2760 case OP_TYPEQUERY:
2761 case OP_TYPEMINQUERY:
2762 c = *ecode++ - OP_TYPESTAR;
2763 minimize = (c & 1) != 0;
2764 min = rep_min[c]; /* Pick up values from tables; */
2765 max = rep_max[c]; /* zero for max => infinity */
2766 if (max == 0) max = INT_MAX;
2767
2768 /* Common code for all repeated single character type matches. Note that
2769 in UTF-8 mode, '.' matches a character of any length, but for the other
2770 character types, the valid characters are all one-byte long. */
2771
2772 REPEATTYPE:
2773 ctype = *ecode++; /* Code for the character type */
2774
2775 #ifdef SUPPORT_UCP
2776 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2777 {
2778 prop_fail_result = ctype == OP_NOTPROP;
2779 prop_type = *ecode++;
2780 prop_value = *ecode++;
2781 }
2782 else prop_type = -1;
2783 #endif
2784
2785 /* First, ensure the minimum number of matches are present. Use inline
2786 code for maximizing the speed, and do the type test once at the start
2787 (i.e. keep it out of the loop). Also we can test that there are at least
2788 the minimum number of bytes before we start. This isn't as effective in
2789 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2790 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2791 and single-bytes. */
2792
2793 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2794 if (min > 0)
2795 {
2796 #ifdef SUPPORT_UCP
2797 if (prop_type >= 0)
2798 {
2799 switch(prop_type)
2800 {
2801 case PT_ANY:
2802 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2803 for (i = 1; i <= min; i++)
2804 {
2805 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2806 GETCHARINCTEST(c, eptr);
2807 }
2808 break;
2809
2810 case PT_LAMP:
2811 for (i = 1; i <= min; i++)
2812 {
2813 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2814 GETCHARINCTEST(c, eptr);
2815 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2816 if ((prop_chartype == ucp_Lu ||
2817 prop_chartype == ucp_Ll ||
2818 prop_chartype == ucp_Lt) == prop_fail_result)
2819 RRETURN(MATCH_NOMATCH);
2820 }
2821 break;
2822
2823 case PT_GC:
2824 for (i = 1; i <= min; i++)
2825 {
2826 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2827 GETCHARINCTEST(c, eptr);
2828 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2829 if ((prop_category == prop_value) == prop_fail_result)
2830 RRETURN(MATCH_NOMATCH);
2831 }
2832 break;
2833
2834 case PT_PC:
2835 for (i = 1; i <= min; i++)
2836 {
2837 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2838 GETCHARINCTEST(c, eptr);
2839 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2840 if ((prop_chartype == prop_value) == prop_fail_result)
2841 RRETURN(MATCH_NOMATCH);
2842 }
2843 break;
2844
2845 case PT_SC:
2846 for (i = 1; i <= min; i++)
2847 {
2848 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2849 GETCHARINCTEST(c, eptr);
2850 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2851 if ((prop_script == prop_value) == prop_fail_result)
2852 RRETURN(MATCH_NOMATCH);
2853 }
2854 break;
2855
2856 default:
2857 RRETURN(PCRE_ERROR_INTERNAL);
2858 }
2859 }
2860
2861 /* Match extended Unicode sequences. We will get here only if the
2862 support is in the binary; otherwise a compile-time error occurs. */
2863
2864 else if (ctype == OP_EXTUNI)
2865 {
2866 for (i = 1; i <= min; i++)
2867 {
2868 GETCHARINCTEST(c, eptr);
2869 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2870 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2871 while (eptr < md->end_subject)
2872 {
2873 int len = 1;
2874 if (!utf8) c = *eptr; else
2875 {
2876 GETCHARLEN(c, eptr, len);
2877 }
2878 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2879 if (prop_category != ucp_M) break;
2880 eptr += len;
2881 }
2882 }
2883 }
2884
2885 else
2886 #endif /* SUPPORT_UCP */
2887
2888 /* Handle all other cases when the coding is UTF-8 */
2889
2890 #ifdef SUPPORT_UTF8
2891 if (utf8) switch(ctype)
2892 {
2893 case OP_ANY:
2894 for (i = 1; i <= min; i++)
2895 {
2896 if (eptr >= md->end_subject ||
2897 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2898 RRETURN(MATCH_NOMATCH);
2899 eptr++;
2900 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2901 }
2902 break;
2903
2904 case OP_ANYBYTE:
2905 eptr += min;
2906 break;
2907
2908 case OP_ANYNL:
2909 for (i = 1; i <= min; i++)
2910 {
2911 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2912 GETCHARINC(c, eptr);
2913 switch(c)
2914 {
2915 default: RRETURN(MATCH_NOMATCH);
2916 case 0x000d:
2917 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2918 break;
2919 case 0x000a:
2920 case 0x000b:
2921 case 0x000c:
2922 case 0x0085:
2923 case 0x2028:
2924 case 0x2029:
2925 break;
2926 }
2927 }
2928 break;
2929
2930 case OP_NOT_HSPACE:
2931 for (i = 1; i <= min; i++)
2932 {
2933 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2934 GETCHARINC(c, eptr);
2935 switch(c)
2936 {
2937 default: break;
2938 case 0x09: /* HT */
2939 case 0x20: /* SPACE */
2940 case 0xa0: /* NBSP */
2941 case 0x1680: /* OGHAM SPACE MARK */
2942 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2943 case 0x2000: /* EN QUAD */
2944 case 0x2001: /* EM QUAD */
2945 case 0x2002: /* EN SPACE */
2946 case 0x2003: /* EM SPACE */
2947 case 0x2004: /* THREE-PER-EM SPACE */
2948 case 0x2005: /* FOUR-PER-EM SPACE */
2949 case 0x2006: /* SIX-PER-EM SPACE */
2950 case 0x2007: /* FIGURE SPACE */
2951 case 0x2008: /* PUNCTUATION SPACE */
2952 case 0x2009: /* THIN SPACE */
2953 case 0x200A: /* HAIR SPACE */
2954 case 0x202f: /* NARROW NO-BREAK SPACE */
2955 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2956 case 0x3000: /* IDEOGRAPHIC SPACE */
2957 RRETURN(MATCH_NOMATCH);
2958 }
2959 }
2960 break;
2961
2962 case OP_HSPACE:
2963 for (i = 1; i <= min; i++)
2964 {
2965 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2966 GETCHARINC(c, eptr);
2967 switch(c)
2968 {
2969 default: RRETURN(MATCH_NOMATCH);
2970 case 0x09: /* HT */
2971 case 0x20: /* SPACE */
2972 case 0xa0: /* NBSP */
2973 case 0x1680: /* OGHAM SPACE MARK */
2974 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2975 case 0x2000: /* EN QUAD */
2976 case 0x2001: /* EM QUAD */
2977 case 0x2002: /* EN SPACE */
2978 case 0x2003: /* EM SPACE */
2979 case 0x2004: /* THREE-PER-EM SPACE */
2980 case 0x2005: /* FOUR-PER-EM SPACE */
2981 case 0x2006: /* SIX-PER-EM SPACE */
2982 case 0x2007: /* FIGURE SPACE */
2983 case 0x2008: /* PUNCTUATION SPACE */
2984 case 0x2009: /* THIN SPACE */
2985 case 0x200A: /* HAIR SPACE */
2986 case 0x202f: /* NARROW NO-BREAK SPACE */
2987 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2988 case 0x3000: /* IDEOGRAPHIC SPACE */
2989 break;
2990 }
2991 }
2992 break;
2993
2994 case OP_NOT_VSPACE:
2995 for (i = 1; i <= min; i++)
2996 {
2997 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998 GETCHARINC(c, eptr);
2999 switch(c)
3000 {
3001 default: break;
3002 case 0x0a: /* LF */
3003 case 0x0b: /* VT */
3004 case 0x0c: /* FF */
3005 case 0x0d: /* CR */
3006 case 0x85: /* NEL */
3007 case 0x2028: /* LINE SEPARATOR */
3008 case 0x2029: /* PARAGRAPH SEPARATOR */
3009 RRETURN(MATCH_NOMATCH);
3010 }
3011 }
3012 break;
3013
3014 case OP_VSPACE:
3015 for (i = 1; i <= min; i++)
3016 {
3017 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018 GETCHARINC(c, eptr);
3019 switch(c)
3020 {
3021 default: RRETURN(MATCH_NOMATCH);
3022 case 0x0a: /* LF */
3023 case 0x0b: /* VT */
3024 case 0x0c: /* FF */
3025 case 0x0d: /* CR */
3026 case 0x85: /* NEL */
3027 case 0x2028: /* LINE SEPARATOR */
3028 case 0x2029: /* PARAGRAPH SEPARATOR */
3029 break;
3030 }
3031 }
3032 break;
3033
3034 case OP_NOT_DIGIT:
3035 for (i = 1; i <= min; i++)
3036 {
3037 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3038 GETCHARINC(c, eptr);
3039 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3040 RRETURN(MATCH_NOMATCH);
3041 }
3042 break;
3043
3044 case OP_DIGIT:
3045 for (i = 1; i <= min; i++)
3046 {
3047 if (eptr >= md->end_subject ||
3048 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3049 RRETURN(MATCH_NOMATCH);
3050 /* No need to skip more bytes - we know it's a 1-byte character */
3051 }
3052 break;
3053
3054 case OP_NOT_WHITESPACE:
3055 for (i = 1; i <= min; i++)
3056 {
3057 if (eptr >= md->end_subject ||
3058 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3059 RRETURN(MATCH_NOMATCH);
3060 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3061 }
3062 break;
3063
3064 case OP_WHITESPACE:
3065 for (i = 1; i <= min; i++)
3066 {
3067 if (eptr >= md->end_subject ||
3068 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3069 RRETURN(MATCH_NOMATCH);
3070 /* No need to skip more bytes - we know it's a 1-byte character */
3071 }
3072 break;
3073
3074 case OP_NOT_WORDCHAR:
3075 for (i = 1; i <= min; i++)
3076 {
3077 if (eptr >= md->end_subject ||
3078 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3079 RRETURN(MATCH_NOMATCH);
3080 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3081 }
3082 break;
3083
3084 case OP_WORDCHAR:
3085 for (i = 1; i <= min; i++)
3086 {
3087 if (eptr >= md->end_subject ||
3088 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3089 RRETURN(MATCH_NOMATCH);
3090 /* No need to skip more bytes - we know it's a 1-byte character */
3091 }
3092 break;
3093
3094 default:
3095 RRETURN(PCRE_ERROR_INTERNAL);
3096 } /* End switch(ctype) */
3097
3098 else
3099 #endif /* SUPPORT_UTF8 */
3100
3101 /* Code for the non-UTF-8 case for minimum matching of operators other
3102 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3103 number of bytes present, as this was tested above. */
3104
3105 switch(ctype)
3106 {
3107 case OP_ANY:
3108 if ((ims & PCRE_DOTALL) == 0)
3109 {
3110 for (i = 1; i <= min; i++)
3111 {
3112 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3113 eptr++;
3114 }
3115 }
3116 else eptr += min;
3117 break;
3118
3119 case OP_ANYBYTE:
3120 eptr += min;
3121 break;
3122
3123 /* Because of the CRLF case, we can't assume the minimum number of
3124 bytes are present in this case. */
3125
3126 case OP_ANYNL:
3127 for (i = 1; i <= min; i++)
3128 {
3129 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3130 switch(*eptr++)
3131 {
3132 default: RRETURN(MATCH_NOMATCH);
3133 case 0x000d:
3134 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3135 break;
3136 case 0x000a:
3137 case 0x000b:
3138 case 0x000c:
3139 case 0x0085:
3140 break;
3141 }
3142 }
3143 break;
3144
3145 case OP_NOT_HSPACE:
3146 for (i = 1; i <= min; i++)
3147 {
3148 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3149 switch(*eptr++)
3150 {
3151 default: break;
3152 case 0x09: /* HT */
3153 case 0x20: /* SPACE */
3154 case 0xa0: /* NBSP */
3155 RRETURN(MATCH_NOMATCH);
3156 }
3157 }
3158 break;
3159
3160 case OP_HSPACE:
3161 for (i = 1; i <= min; i++)
3162 {
3163 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3164 switch(*eptr++)
3165 {
3166 default: RRETURN(MATCH_NOMATCH);
3167 case 0x09: /* HT */
3168 case 0x20: /* SPACE */
3169 case 0xa0: /* NBSP */
3170 break;
3171 }
3172 }
3173 break;
3174
3175 case OP_NOT_VSPACE:
3176 for (i = 1; i <= min; i++)
3177 {
3178 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179 switch(*eptr++)
3180 {
3181 default: break;
3182 case 0x0a: /* LF */
3183 case 0x0b: /* VT */
3184 case 0x0c: /* FF */
3185 case 0x0d: /* CR */
3186 case 0x85: /* NEL */
3187 RRETURN(MATCH_NOMATCH);
3188 }
3189 }
3190 break;
3191
3192 case OP_VSPACE:
3193 for (i = 1; i <= min; i++)
3194 {
3195 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3196 switch(*eptr++)
3197 {
3198 default: RRETURN(MATCH_NOMATCH);
3199 case 0x0a: /* LF */
3200 case 0x0b: /* VT */
3201 case 0x0c: /* FF */
3202 case 0x0d: /* CR */
3203 case 0x85: /* NEL */
3204 break;
3205 }
3206 }
3207 break;
3208
3209 case OP_NOT_DIGIT:
3210 for (i = 1; i <= min; i++)
3211 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3212 break;
3213
3214 case OP_DIGIT:
3215 for (i = 1; i <= min; i++)
3216 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3217 break;
3218
3219 case OP_NOT_WHITESPACE:
3220 for (i = 1; i <= min; i++)
3221 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3222 break;
3223
3224 case OP_WHITESPACE:
3225 for (i = 1; i <= min; i++)
3226 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3227 break;
3228
3229 case OP_NOT_WORDCHAR:
3230 for (i = 1; i <= min; i++)
3231 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3232 RRETURN(MATCH_NOMATCH);
3233 break;
3234
3235 case OP_WORDCHAR:
3236 for (i = 1; i <= min; i++)
3237 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3238 RRETURN(MATCH_NOMATCH);
3239 break;
3240
3241 default:
3242 RRETURN(PCRE_ERROR_INTERNAL);
3243 }
3244 }
3245
3246 /* If min = max, continue at the same level without recursing */
3247
3248 if (min == max) continue;
3249
3250 /* If minimizing, we have to test the rest of the pattern before each
3251 subsequent match. Again, separate the UTF-8 case for speed, and also
3252 separate the UCP cases. */
3253
3254 if (minimize)
3255 {
3256 #ifdef SUPPORT_UCP
3257 if (prop_type >= 0)
3258 {
3259 switch(prop_type)
3260 {
3261 case PT_ANY:
3262 for (fi = min;; fi++)
3263 {
3264 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3265 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3266 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3267 GETCHARINC(c, eptr);
3268 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3269 }
3270 /* Control never gets here */
3271
3272 case PT_LAMP:
3273 for (fi = min;; fi++)
3274 {
3275 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3276 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3278 GETCHARINC(c, eptr);
3279 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3280 if ((prop_chartype == ucp_Lu ||
3281 prop_chartype == ucp_Ll ||
3282 prop_chartype == ucp_Lt) == prop_fail_result)
3283 RRETURN(MATCH_NOMATCH);
3284 }
3285 /* Control never gets here */
3286
3287 case PT_GC:
3288 for (fi = min;; fi++)
3289 {
3290 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3291 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3292 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3293 GETCHARINC(c, eptr);
3294 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3295 if ((prop_category == prop_value) == prop_fail_result)
3296 RRETURN(MATCH_NOMATCH);
3297 }
3298 /* Control never gets here */
3299
3300 case PT_PC:
3301 for (fi = min;; fi++)
3302 {
3303 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3304 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3305 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306 GETCHARINC(c, eptr);
3307 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3308 if ((prop_chartype == prop_value) == prop_fail_result)
3309 RRETURN(MATCH_NOMATCH);
3310 }
3311 /* Control never gets here */
3312
3313 case PT_SC:
3314 for (fi = min;; fi++)
3315 {
3316 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3317 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3319 GETCHARINC(c, eptr);
3320 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3321 if ((prop_script == prop_value) == prop_fail_result)
3322 RRETURN(MATCH_NOMATCH);
3323 }
3324 /* Control never gets here */
3325
3326 default:
3327 RRETURN(PCRE_ERROR_INTERNAL);
3328 }
3329 }
3330
3331 /* Match extended Unicode sequences. We will get here only if the
3332 support is in the binary; otherwise a compile-time error occurs. */
3333
3334 else if (ctype == OP_EXTUNI)
3335 {
3336 for (fi = min;; fi++)
3337 {
3338 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3339 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3340 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3341 GETCHARINCTEST(c, eptr);
3342 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3343 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3344 while (eptr < md->end_subject)
3345 {
3346 int len = 1;
3347 if (!utf8) c = *eptr; else
3348 {
3349 GETCHARLEN(c, eptr, len);
3350 }
3351 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3352 if (prop_category != ucp_M) break;
3353 eptr += len;
3354 }
3355 }
3356 }
3357
3358 else
3359 #endif /* SUPPORT_UCP */
3360
3361 #ifdef SUPPORT_UTF8
3362 /* UTF-8 mode */
3363 if (utf8)
3364 {
3365 for (fi = min;; fi++)
3366 {
3367 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3368 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3369 if (fi >= max || eptr >= md->end_subject ||
3370 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3371 IS_NEWLINE(eptr)))
3372 RRETURN(MATCH_NOMATCH);
3373
3374 GETCHARINC(c, eptr);
3375 switch(ctype)
3376 {
3377 case OP_ANY: /* This is the DOTALL case */
3378 break;
3379
3380 case OP_ANYBYTE:
3381 break;
3382
3383 case OP_ANYNL:
3384 switch(c)
3385 {
3386 default: RRETURN(MATCH_NOMATCH);
3387 case 0x000d:
3388 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3389 break;
3390 case 0x000a:
3391 case 0x000b:
3392 case 0x000c:
3393 case 0x0085:
3394 case 0x2028:
3395 case 0x2029:
3396 break;
3397 }
3398 break;
3399
3400 case OP_NOT_HSPACE:
3401 switch(c)
3402 {
3403 default: break;
3404 case 0x09: /* HT */
3405 case 0x20: /* SPACE */
3406 case 0xa0: /* NBSP */
3407 case 0x1680: /* OGHAM SPACE MARK */
3408 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3409 case 0x2000: /* EN QUAD */
3410 case 0x2001: /* EM QUAD */
3411 case 0x2002: /* EN SPACE */
3412 case 0x2003: /* EM SPACE */
3413 case 0x2004: /* THREE-PER-EM SPACE */
3414 case 0x2005: /* FOUR-PER-EM SPACE */
3415 case 0x2006: /* SIX-PER-EM SPACE */
3416 case 0x2007: /* FIGURE SPACE */
3417 case 0x2008: /* PUNCTUATION SPACE */
3418 case 0x2009: /* THIN SPACE */
3419 case 0x200A: /* HAIR SPACE */
3420 case 0x202f: /* NARROW NO-BREAK SPACE */
3421 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3422 case 0x3000: /* IDEOGRAPHIC SPACE */
3423 RRETURN(MATCH_NOMATCH);
3424 }
3425 break;
3426
3427 case OP_HSPACE:
3428 switch(c)
3429 {
3430 default: RRETURN(MATCH_NOMATCH);
3431 case 0x09: /* HT */
3432 case 0x20: /* SPACE */
3433 case 0xa0: /* NBSP */
3434 case 0x1680: /* OGHAM SPACE MARK */
3435 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3436 case 0x2000: /* EN QUAD */
3437 case 0x2001: /* EM QUAD */
3438 case 0x2002: /* EN SPACE */
3439 case 0x2003: /* EM SPACE */
3440 case 0x2004: /* THREE-PER-EM SPACE */
3441 case 0x2005: /* FOUR-PER-EM SPACE */
3442 case 0x2006: /* SIX-PER-EM SPACE */
3443 case 0x2007: /* FIGURE SPACE */
3444 case 0x2008: /* PUNCTUATION SPACE */
3445 case 0x2009: /* THIN SPACE */
3446 case 0x200A: /* HAIR SPACE */
3447 case 0x202f: /* NARROW NO-BREAK SPACE */
3448 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3449 case 0x3000: /* IDEOGRAPHIC SPACE */
3450 break;
3451 }
3452 break;
3453
3454 case OP_NOT_VSPACE:
3455 switch(c)
3456 {
3457 default: break;
3458 case 0x0a: /* LF */
3459 case 0x0b: /* VT */
3460 case 0x0c: /* FF */
3461 case 0x0d: /* CR */
3462 case 0x85: /* NEL */
3463 case 0x2028: /* LINE SEPARATOR */
3464 case 0x2029: /* PARAGRAPH SEPARATOR */
3465 RRETURN(MATCH_NOMATCH);
3466 }
3467 break;
3468
3469 case OP_VSPACE:
3470 switch(c)
3471 {
3472 default: RRETURN(MATCH_NOMATCH);
3473 case 0x0a: /* LF */
3474 case 0x0b: /* VT */
3475 case 0x0c: /* FF */
3476 case 0x0d: /* CR */
3477 case 0x85: /* NEL */
3478 case 0x2028: /* LINE SEPARATOR */
3479 case 0x2029: /* PARAGRAPH SEPARATOR */
3480 break;
3481 }
3482 break;
3483
3484 case OP_NOT_DIGIT:
3485 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3486 RRETURN(MATCH_NOMATCH);
3487 break;
3488
3489 case OP_DIGIT:
3490 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3491 RRETURN(MATCH_NOMATCH);
3492 break;
3493
3494 case OP_NOT_WHITESPACE:
3495 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3496 RRETURN(MATCH_NOMATCH);
3497 break;
3498
3499 case OP_WHITESPACE:
3500 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3501 RRETURN(MATCH_NOMATCH);
3502 break;
3503
3504 case OP_NOT_WORDCHAR:
3505 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3506 RRETURN(MATCH_NOMATCH);
3507 break;
3508
3509 case OP_WORDCHAR:
3510 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3511 RRETURN(MATCH_NOMATCH);
3512 break;
3513
3514 default:
3515 RRETURN(PCRE_ERROR_INTERNAL);
3516 }
3517 }
3518 }
3519 else
3520 #endif
3521 /* Not UTF-8 mode */
3522 {
3523 for (fi = min;; fi++)
3524 {
3525 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3526 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3527 if (fi >= max || eptr >= md->end_subject ||
3528 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3529 RRETURN(MATCH_NOMATCH);
3530
3531 c = *eptr++;
3532 switch(ctype)
3533 {
3534 case OP_ANY: /* This is the DOTALL case */
3535 break;
3536
3537 case OP_ANYBYTE:
3538 break;
3539
3540 case OP_ANYNL:
3541 switch(c)
3542 {
3543 default: RRETURN(MATCH_NOMATCH);
3544 case 0x000d:
3545 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3546 break;
3547 case 0x000a:
3548 case 0x000b:
3549 case 0x000c:
3550 case 0x0085:
3551 break;
3552 }
3553 break;
3554
3555 case OP_NOT_HSPACE:
3556 switch(c)
3557 {
3558 default: break;
3559 case 0x09: /* HT */
3560 case 0x20: /* SPACE */
3561 case 0xa0: /* NBSP */
3562 RRETURN(MATCH_NOMATCH);
3563 }
3564 break;
3565
3566 case OP_HSPACE:
3567 switch(c)
3568 {
3569 default: RRETURN(MATCH_NOMATCH);
3570 case 0x09: /* HT */
3571 case 0x20: /* SPACE */
3572 case 0xa0: /* NBSP */
3573 break;
3574 }
3575 break;
3576
3577 case OP_NOT_VSPACE:
3578 switch(c)
3579 {
3580 default: break;
3581 case 0x0a: /* LF */
3582 case 0x0b: /* VT */
3583 case 0x0c: /* FF */
3584 case 0x0d: /* CR */
3585 case 0x85: /* NEL */
3586 RRETURN(MATCH_NOMATCH);
3587 }
3588 break;
3589
3590 case OP_VSPACE:
3591 switch(c)
3592 {
3593 default: RRETURN(MATCH_NOMATCH);
3594 case 0x0a: /* LF */
3595 case 0x0b: /* VT */
3596 case 0x0c: /* FF */
3597 case 0x0d: /* CR */
3598 case 0x85: /* NEL */
3599 break;
3600 }
3601 break;
3602
3603 case OP_NOT_DIGIT:
3604 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3605 break;
3606
3607 case OP_DIGIT:
3608 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3609 break;
3610
3611 case OP_NOT_WHITESPACE:
3612 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3613 break;
3614
3615 case OP_WHITESPACE:
3616 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3617 break;
3618
3619 case OP_NOT_WORDCHAR:
3620 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3621 break;
3622
3623 case OP_WORDCHAR:
3624 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3625 break;
3626
3627 default:
3628 RRETURN(PCRE_ERROR_INTERNAL);
3629 }
3630 }
3631 }
3632 /* Control never gets here */
3633 }
3634
3635 /* If maximizing, it is worth using inline code for speed, doing the type
3636 test once at the start (i.e. keep it out of the loop). Again, keep the
3637 UTF-8 and UCP stuff separate. */
3638
3639 else
3640 {
3641 pp = eptr; /* Remember where we started */
3642
3643 #ifdef SUPPORT_UCP
3644 if (prop_type >= 0)
3645 {
3646 switch(prop_type)
3647 {
3648 case PT_ANY:
3649 for (i = min; i < max; i++)
3650 {
3651 int len = 1;
3652 if (eptr >= md->end_subject) break;
3653 GETCHARLEN(c, eptr, len);
3654 if (prop_fail_result) break;
3655 eptr+= len;
3656 }
3657 break;
3658
3659 case PT_LAMP:
3660 for (i = min; i < max; i++)
3661 {
3662 int len = 1;
3663 if (eptr >= md->end_subject) break;
3664 GETCHARLEN(c, eptr, len);
3665 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3666 if ((prop_chartype == ucp_Lu ||
3667 prop_chartype == ucp_Ll ||
3668 prop_chartype == ucp_Lt) == prop_fail_result)
3669 break;
3670 eptr+= len;
3671 }
3672 break;
3673
3674 case PT_GC:
3675 for (i = min; i < max; i++)
3676 {
3677 int len = 1;
3678 if (eptr >= md->end_subject) break;
3679 GETCHARLEN(c, eptr, len);
3680 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3681 if ((prop_category == prop_value) == prop_fail_result)
3682 break;
3683 eptr+= len;
3684 }
3685 break;
3686
3687 case PT_PC:
3688 for (i = min; i < max; i++)
3689 {
3690 int len = 1;
3691 if (eptr >= md->end_subject) break;
3692 GETCHARLEN(c, eptr, len);
3693 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3694 if ((prop_chartype == prop_value) == prop_fail_result)
3695 break;
3696 eptr+= len;
3697 }
3698 break;
3699
3700 case PT_SC:
3701 for (i = min; i < max; i++)
3702 {
3703 int len = 1;
3704 if (eptr >= md->end_subject) break;
3705 GETCHARLEN(c, eptr, len);
3706 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3707 if ((prop_script == prop_value) == prop_fail_result)
3708 break;
3709 eptr+= len;
3710 }
3711 break;
3712 }
3713
3714 /* eptr is now past the end of the maximum run */
3715
3716 if (possessive) continue;
3717 for(;;)
3718 {
3719 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3720 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3721 if (eptr-- == pp) break; /* Stop if tried at original pos */
3722 if (utf8) BACKCHAR(eptr);
3723 }
3724 }
3725
3726 /* Match extended Unicode sequences. We will get here only if the
3727 support is in the binary; otherwise a compile-time error occurs. */
3728
3729 else if (ctype == OP_EXTUNI)
3730 {
3731 for (i = min; i < max; i++)
3732 {
3733 if (eptr >= md->end_subject) break;
3734 GETCHARINCTEST(c, eptr);
3735 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3736 if (prop_category == ucp_M) break;
3737 while (eptr < md->end_subject)
3738 {
3739 int len = 1;
3740 if (!utf8) c = *eptr; else
3741 {
3742 GETCHARLEN(c, eptr, len);
3743 }
3744 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3745 if (prop_category != ucp_M) break;
3746 eptr += len;
3747 }
3748 }
3749
3750 /* eptr is now past the end of the maximum run */
3751
3752 if (possessive) continue;
3753 for(;;)
3754 {
3755 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3756 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3757 if (eptr-- == pp) break; /* Stop if tried at original pos */
3758 for (;;) /* Move back over one extended */
3759 {
3760 int len = 1;
3761 if (!utf8) c = *eptr; else
3762 {
3763 BACKCHAR(eptr);
3764 GETCHARLEN(c, eptr, len);
3765 }
3766 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3767 if (prop_category != ucp_M) break;
3768 eptr--;
3769 }
3770 }
3771 }
3772
3773 else
3774 #endif /* SUPPORT_UCP */
3775
3776 #ifdef SUPPORT_UTF8
3777 /* UTF-8 mode */
3778
3779 if (utf8)
3780 {
3781 switch(ctype)
3782 {
3783 case OP_ANY:
3784 if (max < INT_MAX)
3785 {
3786 if ((ims & PCRE_DOTALL) == 0)
3787 {
3788 for (i = min; i < max; i++)
3789 {
3790 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3791 eptr++;
3792 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3793 }
3794 }
3795 else
3796 {
3797 for (i = min; i < max; i++)
3798 {
3799 if (eptr >= md->end_subject) break;
3800 eptr++;
3801 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3802 }
3803 }
3804 }
3805
3806 /* Handle unlimited UTF-8 repeat */
3807
3808 else
3809 {
3810 if ((ims & PCRE_DOTALL) == 0)
3811 {
3812 for (i = min; i < max; i++)
3813 {
3814 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3815 eptr++;
3816 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3817 }
3818 }
3819 else
3820 {
3821 eptr = md->end_subject;
3822 }
3823 }
3824 break;
3825
3826 /* The byte case is the same as non-UTF8 */
3827
3828 case OP_ANYBYTE:
3829 c = max - min;
3830 if (c > (unsigned int)(md->end_subject - eptr))
3831 c = md->end_subject - eptr;
3832 eptr += c;
3833 break;
3834
3835 case OP_ANYNL:
3836 for (i = min; i < max; i++)
3837 {
3838 int len = 1;
3839 if (eptr >= md->end_subject) break;
3840 GETCHARLEN(c, eptr, len);
3841 if (c == 0x000d)
3842 {
3843 if (++eptr >= md->end_subject) break;
3844 if (*eptr == 0x000a) eptr++;
3845 }
3846 else
3847 {
3848 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3849 c != 0x0085 && c != 0x2028 && c != 0x2029)
3850 break;
3851 eptr += len;
3852 }
3853 }
3854 break;
3855
3856 case OP_NOT_HSPACE:
3857 case OP_HSPACE:
3858 for (i = min; i < max; i++)
3859 {
3860 BOOL gotspace;
3861 int len = 1;
3862 if (eptr >= md->end_subject) break;
3863 GETCHARLEN(c, eptr, len);
3864 switch(c)
3865 {
3866 default: gotspace = FALSE; break;
3867 case 0x09: /* HT */
3868 case 0x20: /* SPACE */
3869 case 0xa0: /* NBSP */
3870 case 0x1680: /* OGHAM SPACE MARK */
3871 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3872 case 0x2000: /* EN QUAD */
3873 case 0x2001: /* EM QUAD */
3874 case 0x2002: /* EN SPACE */
3875 case 0x2003: /* EM SPACE */
3876 case 0x2004: /* THREE-PER-EM SPACE */
3877 case 0x2005: /* FOUR-PER-EM SPACE */
3878 case 0x2006: /* SIX-PER-EM SPACE */
3879 case 0x2007: /* FIGURE SPACE */
3880 case 0x2008: /* PUNCTUATION SPACE */
3881 case 0x2009: /* THIN SPACE */
3882 case 0x200A: /* HAIR SPACE */
3883 case 0x202f: /* NARROW NO-BREAK SPACE */
3884 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3885 case 0x3000: /* IDEOGRAPHIC SPACE */
3886 gotspace = TRUE;
3887 break;
3888 }
3889 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3890 eptr += len;
3891 }
3892 break;
3893
3894 case OP_NOT_VSPACE:
3895 case OP_VSPACE:
3896 for (i = min; i < max; i++)
3897 {
3898 BOOL gotspace;
3899 int len = 1;
3900 if (eptr >= md->end_subject) break;
3901 GETCHARLEN(c, eptr, len);
3902 switch(c)
3903 {
3904 default: gotspace = FALSE; break;
3905 case 0x0a: /* LF */
3906 case 0x0b: /* VT */
3907 case 0x0c: /* FF */
3908 case 0x0d: /* CR */
3909 case 0x85: /* NEL */
3910 case 0x2028: /* LINE SEPARATOR */
3911 case 0x2029: /* PARAGRAPH SEPARATOR */
3912 gotspace = TRUE;
3913 break;
3914 }
3915 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3916 eptr += len;
3917 }
3918 break;
3919
3920 case OP_NOT_DIGIT:
3921 for (i = min; i < max; i++)
3922 {
3923 int len = 1;
3924 if (eptr >= md->end_subject) break;
3925 GETCHARLEN(c, eptr, len);
3926 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3927 eptr+= len;
3928 }
3929 break;
3930
3931 case OP_DIGIT:
3932 for (i = min; i < max; i++)
3933 {
3934 int len = 1;
3935 if (eptr >= md->end_subject) break;
3936 GETCHARLEN(c, eptr, len);
3937 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3938 eptr+= len;
3939 }
3940 break;
3941
3942 case OP_NOT_WHITESPACE:
3943 for (i = min; i < max; i++)
3944 {
3945 int len = 1;
3946 if (eptr >= md->end_subject) break;
3947 GETCHARLEN(c, eptr, len);
3948 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3949 eptr+= len;
3950 }
3951 break;
3952
3953 case OP_WHITESPACE:
3954 for (i = min; i < max; i++)
3955 {
3956 int len = 1;
3957 if (eptr >= md->end_subject) break;
3958 GETCHARLEN(c, eptr, len);
3959 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3960 eptr+= len;
3961 }
3962 break;
3963
3964 case OP_NOT_WORDCHAR:
3965 for (i = min; i < max; i++)
3966 {
3967 int len = 1;
3968 if (eptr >= md->end_subject) break;
3969 GETCHARLEN(c, eptr, len);
3970 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3971 eptr+= len;
3972 }
3973 break;
3974
3975 case OP_WORDCHAR:
3976 for (i = min; i < max; i++)
3977 {
3978 int len = 1;
3979 if (eptr >= md->end_subject) break;
3980 GETCHARLEN(c, eptr, len);
3981 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3982 eptr+= len;
3983 }
3984 break;
3985
3986 default:
3987 RRETURN(PCRE_ERROR_INTERNAL);
3988 }
3989
3990 /* eptr is now past the end of the maximum run */
3991
3992 if (possessive) continue;
3993 for(;;)
3994 {
3995 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3996 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3997 if (eptr-- == pp) break; /* Stop if tried at original pos */
3998 BACKCHAR(eptr);
3999 }
4000 }
4001 else
4002 #endif /* SUPPORT_UTF8 */
4003
4004 /* Not UTF-8 mode */
4005 {
4006 switch(ctype)
4007 {
4008 case OP_ANY:
4009 if ((ims & PCRE_DOTALL) == 0)
4010 {
4011 for (i = min; i < max; i++)
4012 {
4013 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4014 eptr++;
4015 }
4016 break;
4017 }
4018 /* For DOTALL case, fall through and treat as \C */
4019
4020 case OP_ANYBYTE:
4021 c = max - min;
4022 if (c > (unsigned int)(md->end_subject - eptr))
4023 c = md->end_subject - eptr;
4024 eptr += c;
4025 break;
4026
4027 case OP_ANYNL:
4028 for (i = min; i < max; i++)
4029 {
4030 if (eptr >= md->end_subject) break;
4031 c = *eptr;
4032 if (c == 0x000d)
4033 {
4034 if (++eptr >= md->end_subject) break;
4035 if (*eptr == 0x000a) eptr++;
4036 }
4037 else
4038 {
4039 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4040 break;
4041 eptr++;
4042 }
4043 }
4044 break;
4045
4046 case OP_NOT_HSPACE:
4047 for (i = min; i < max; i++)
4048 {
4049 if (eptr >= md->end_subject) break;
4050 c = *eptr;
4051 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4052 eptr++;
4053 }
4054 break;
4055
4056 case OP_HSPACE:
4057 for (i = min; i < max; i++)
4058 {
4059 if (eptr >= md->end_subject) break;
4060 c = *eptr;
4061 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4062 eptr++;
4063 }
4064 break;
4065
4066 case OP_NOT_VSPACE:
4067 for (i = min; i < max; i++)
4068 {
4069 if (eptr >= md->end_subject) break;
4070 c = *eptr;
4071 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4072 break;
4073 eptr++;
4074 }
4075 break;
4076
4077 case OP_VSPACE:
4078 for (i = min; i < max; i++)
4079 {
4080 if (eptr >= md->end_subject) break;
4081 c = *eptr;
4082 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4083 break;
4084 eptr++;
4085 }
4086 break;
4087
4088 case OP_NOT_DIGIT:
4089 for (i = min; i < max; i++)
4090 {
4091 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4092 break;
4093 eptr++;
4094 }
4095 break;
4096
4097 case OP_DIGIT:
4098 for (i = min; i < max; i++)
4099 {
4100 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4101 break;
4102 eptr++;
4103 }
4104 break;
4105
4106 case OP_NOT_WHITESPACE:
4107 for (i = min; i < max; i++)
4108 {
4109 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4110 break;
4111 eptr++;
4112 }
4113 break;
4114
4115 case OP_WHITESPACE:
4116 for (i = min; i < max; i++)
4117 {
4118 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4119 break;
4120 eptr++;
4121 }
4122 break;
4123
4124 case OP_NOT_WORDCHAR:
4125 for (i = min; i < max; i++)
4126 {
4127 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4128 break;
4129 eptr++;
4130 }
4131 break;
4132
4133 case OP_WORDCHAR:
4134 for (i = min; i < max; i++)
4135 {
4136 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4137 break;
4138 eptr++;
4139 }
4140 break;
4141
4142 default:
4143 RRETURN(PCRE_ERROR_INTERNAL);
4144 }
4145
4146 /* eptr is now past the end of the maximum run */
4147
4148 if (possessive) continue;
4149 while (eptr >= pp)
4150 {
4151 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4152 eptr--;
4153 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4154 }
4155 }
4156
4157 /* Get here if we can't make it match with any permitted repetitions */
4158
4159 RRETURN(MATCH_NOMATCH);
4160 }
4161 /* Control never gets here */
4162
4163 /* There's been some horrible disaster. Arrival here can only mean there is
4164 something seriously wrong in the code above or the OP_xxx definitions. */
4165
4166 default:
4167 DPRINTF(("Unknown opcode %d\n", *ecode));
4168 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4169 }
4170
4171 /* Do not stick any code in here without much thought; it is assumed
4172 that "continue" in the code above comes out to here to repeat the main
4173 loop. */
4174
4175 } /* End of main loop */
4176 /* Control never reaches here */
4177
4178
4179 /* When compiling to use the heap rather than the stack for recursive calls to
4180 match(), the RRETURN() macro jumps here. The number that is saved in
4181 frame->Xwhere indicates which label we actually want to return to. */
4182
4183 #ifdef NO_RECURSE
4184 #define LBL(val) case val: goto L_RM##val;
4185 HEAP_RETURN:
4186 switch (frame->Xwhere)
4187 {
4188 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4189 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4190 LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4191 LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4192 LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4193 LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4194 default:
4195 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4196 return PCRE_ERROR_INTERNAL;
4197 }
4198 #undef LBL
4199 #endif /* NO_RECURSE */
4200 }
4201
4202
4203 /***************************************************************************
4204 ****************************************************************************
4205 RECURSION IN THE match() FUNCTION
4206
4207 Undefine all the macros that were defined above to handle this. */
4208
4209 #ifdef NO_RECURSE
4210 #undef eptr
4211 #undef ecode
4212 #undef mstart
4213 #undef offset_top
4214 #undef ims
4215 #undef eptrb
4216 #undef flags
4217
4218 #undef callpat
4219 #undef charptr
4220 #undef data
4221 #undef next
4222 #undef pp
4223 #undef prev
4224 #undef saved_eptr
4225
4226 #undef new_recursive
4227
4228 #undef cur_is_word
4229 #undef condition
4230 #undef prev_is_word
4231
4232 #undef original_ims
4233
4234 #undef ctype
4235 #undef length
4236 #undef max
4237 #undef min
4238 #undef number
4239 #undef offset
4240 #undef op
4241 #undef save_capture_last
4242 #undef save_offset1
4243 #undef save_offset2
4244 #undef save_offset3
4245 #undef stacksave
4246
4247 #undef newptrb
4248
4249 #endif
4250
4251 /* These two are defined as macros in both cases */
4252
4253 #undef fc
4254 #undef fi
4255
4256 /***************************************************************************
4257 ***************************************************************************/
4258
4259
4260
4261 /*************************************************
4262 * Execute a Regular Expression *
4263 *************************************************/
4264
4265 /* This function applies a compiled re to a subject string and picks out
4266 portions of the string if it matches. Two elements in the vector are set for
4267 each substring: the offsets to the start and end of the substring.
4268
4269 Arguments:
4270 argument_re points to the compiled expression
4271 extra_data points to extra data or is NULL
4272 subject points to the subject string
4273 length length of subject string (may contain binary zeros)
4274 start_offset where to start in the subject string
4275 options option bits
4276 offsets points to a vector of ints to be filled in with offsets
4277 offsetcount the number of elements in the vector
4278
4279 Returns: > 0 => success; value is the number of elements filled in
4280 = 0 => success, but offsets is not big enough
4281 -1 => failed to match
4282 < -1 => some kind of unexpected problem
4283 */
4284
4285 PCRE_EXP_DEFN int
4286 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4287 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4288 int offsetcount)
4289 {
4290 int rc, resetcount, ocount;
4291 int first_byte = -1;
4292 int req_byte = -1;
4293 int req_byte2 = -1;
4294 int newline;
4295 unsigned long int ims;
4296 BOOL using_temporary_offsets = FALSE;
4297 BOOL anchored;
4298 BOOL startline;
4299 BOOL firstline;
4300 BOOL first_byte_caseless = FALSE;
4301 BOOL req_byte_caseless = FALSE;
4302 BOOL utf8;
4303 match_data match_block;
4304 match_data *md = &match_block;
4305 const uschar *tables;
4306 const uschar *start_bits = NULL;
4307 USPTR start_match = (USPTR)subject + start_offset;
4308 USPTR end_subject;
4309 USPTR req_byte_ptr = start_match - 1;
4310
4311 pcre_study_data internal_study;
4312 const pcre_study_data *study;
4313
4314 real_pcre internal_re;
4315 const real_pcre *external_re = (const real_pcre *)argument_re;
4316 const real_pcre *re = external_re;
4317
4318 /* Plausibility checks */
4319
4320 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4321 if (re == NULL || subject == NULL ||
4322 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4323 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4324
4325 /* Fish out the optional data from the extra_data structure, first setting
4326 the default values. */
4327
4328 study = NULL;
4329 md->match_limit = MATCH_LIMIT;
4330 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4331 md->callout_data = NULL;
4332
4333 /* The table pointer is always in native byte order. */
4334
4335 tables = external_re->tables;
4336
4337 if (extra_data != NULL)
4338 {
4339 register unsigned int flags = extra_data->flags;
4340 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4341 study = (const pcre_study_data *)extra_data->study_data;
4342 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4343 md->match_limit = extra_data->match_limit;
4344 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4345 md->match_limit_recursion = extra_data->match_limit_recursion;
4346 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4347 md->callout_data = extra_data->callout_data;
4348 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4349 }
4350
4351 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4352 is a feature that makes it possible to save compiled regex and re-use them
4353 in other programs later. */
4354
4355 if (tables == NULL) tables = _pcre_default_tables;
4356
4357 /* Check that the first field in the block is the magic number. If it is not,
4358 test for a regex that was compiled on a host of opposite endianness. If this is
4359 the case, flipped values are put in internal_re and internal_study if there was
4360 study data too. */
4361
4362 if (re->magic_number != MAGIC_NUMBER)
4363 {
4364 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4365 if (re == NULL) return PCRE_ERROR_BADMAGIC;
4366 if (study != NULL) study = &internal_study;
4367 }
4368
4369 /* Set up other data */
4370
4371 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4372 startline = (re->options & PCRE_STARTLINE) != 0;
4373 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4374
4375 /* The code starts after the real_pcre block and the capture name table. */
4376
4377 md->start_code = (const uschar *)external_re + re->name_table_offset +
4378 re->name_count * re->name_entry_size;
4379
4380 md->start_subject = (USPTR)subject;
4381 md->start_offset = start_offset;
4382 md->end_subject = md->start_subject + length;
4383 end_subject = md->end_subject;
4384
4385 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4386 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4387
4388 md->notbol = (options & PCRE_NOTBOL) != 0;
4389 md->noteol = (options & PCRE_NOTEOL) != 0;
4390 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4391 md->partial = (options & PCRE_PARTIAL) != 0;
4392 md->hitend = FALSE;
4393
4394 md->recursive = NULL; /* No recursion at top level */
4395
4396 md->lcc = tables + lcc_offset;
4397 md->ctypes = tables + ctypes_offset;
4398
4399 /* Handle different types of newline. The three bits give eight cases. If
4400 nothing is set at run time, whatever was used at compile time applies. */
4401
4402 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4403 PCRE_NEWLINE_BITS)
4404 {
4405 case 0: newline = NEWLINE; break; /* Compile-time default */
4406 case PCRE_NEWLINE_CR: newline = '\r'; break;
4407 case PCRE_NEWLINE_LF: newline = '\n'; break;
4408 case PCRE_NEWLINE_CR+
4409 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4410 case PCRE_NEWLINE_ANY: newline = -1; break;
4411 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4412 default: return PCRE_ERROR_BADNEWLINE;
4413 }
4414
4415 if (newline == -2)
4416 {
4417 md->nltype = NLTYPE_ANYCRLF;
4418 }
4419 else if (newline < 0)
4420 {
4421 md->nltype = NLTYPE_ANY;
4422 }
4423 else
4424 {
4425 md->nltype = NLTYPE_FIXED;
4426 if (newline > 255)
4427 {
4428 md->nllen = 2;
4429 md->nl[0] = (newline >> 8) & 255;
4430 md->nl[1] = newline & 255;
4431 }
4432 else
4433 {
4434 md->nllen = 1;
4435 md->nl[0] = newline;
4436 }
4437 }
4438
4439 /* Partial matching is supported only for a restricted set of regexes at the
4440 moment. */
4441
4442 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4443 return PCRE_ERROR_BADPARTIAL;
4444
4445 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4446 back the character offset. */
4447
4448 #ifdef SUPPORT_UTF8
4449 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4450 {
4451 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4452 return PCRE_ERROR_BADUTF8;
4453 if (start_offset > 0 && start_offset < length)
4454 {
4455 int tb = ((uschar *)subject)[start_offset];
4456 if (tb > 127)
4457 {
4458 tb &= 0xc0;
4459 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4460 }
4461 }
4462 }
4463 #endif
4464
4465 /* The ims options can vary during the matching as a result of the presence
4466 of (?ims) items in the pattern. They are kept in a local variable so that
4467 restoring at the exit of a group is easy. */
4468
4469 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4470
4471 /* If the expression has got more back references than the offsets supplied can
4472 hold, we get a temporary chunk of working store to use during the matching.
4473 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4474 of 3. */
4475
4476 ocount = offsetcount - (offsetcount % 3);
4477
4478 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4479 {
4480 ocount = re->top_backref * 3 + 3;
4481 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4482 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4483 using_temporary_offsets = TRUE;
4484 DPRINTF(("Got memory to hold back references\n"));
4485 }
4486 else md->offset_vector = offsets;
4487
4488 md->offset_end = ocount;
4489 md->offset_max = (2*ocount)/3;
4490 md->offset_overflow = FALSE;
4491 md->capture_last = -1;
4492
4493 /* Compute the minimum number of offsets that we need to reset each time. Doing
4494 this makes a huge difference to execution time when there aren't many brackets
4495 in the pattern. */
4496
4497 resetcount = 2 + re->top_bracket * 2;
4498 if (resetcount > offsetcount) resetcount = ocount;
4499
4500 /* Reset the working variable associated with each extraction. These should
4501 never be used unless previously set, but they get saved and restored, and so we
4502 initialize them to avoid reading uninitialized locations. */
4503
4504 if (md->offset_vector != NULL)
4505 {
4506 register int *iptr = md->offset_vector + ocount;
4507 register int *iend = iptr - resetcount/2 + 1;
4508 while (--iptr >= iend) *iptr = -1;
4509 }
4510
4511 /* Set up the first character to match, if available. The first_byte value is
4512 never set for an anchored regular expression, but the anchoring may be forced
4513 at run time, so we have to test for anchoring. The first char may be unset for
4514 an unanchored pattern, of course. If there's no first char and the pattern was
4515 studied, there may be a bitmap of possible first characters. */
4516
4517 if (!anchored)
4518 {
4519 if ((re->options & PCRE_FIRSTSET) != 0)
4520 {
4521 first_byte = re->first_byte & 255;
4522 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4523 first_byte = md->lcc[first_byte];
4524 }
4525 else
4526 if (!startline && study != NULL &&
4527 (study->options & PCRE_STUDY_MAPPED) != 0)
4528 start_bits = study->start_bits;
4529 }
4530
4531 /* For anchored or unanchored matches, there may be a "last known required
4532 character" set. */
4533
4534 if ((re->options & PCRE_REQCHSET) != 0)
4535 {
4536 req_byte = re->req_byte & 255;
4537 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4538 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
4539 }
4540
4541
4542 /* ==========================================================================*/
4543
4544 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4545 the loop runs just once. */
4546
4547 for(;;)
4548 {
4549 USPTR save_end_subject = end_subject;
4550
4551 /* Reset the maximum number of extractions we might see. */
4552
4553 if (md->offset_vector != NULL)
4554 {
4555 register int *iptr = md->offset_vector;
4556 register int *iend = iptr + resetcount;
4557 while (iptr < iend) *iptr++ = -1;
4558 }
4559
4560 /* Advance to a unique first char if possible. If firstline is TRUE, the
4561 start of the match is constrained to the first line of a multiline string.
4562 That is, the match must be before or at the first newline. Implement this by
4563 temporarily adjusting end_subject so that we stop scanning at a newline. If
4564 the match fails at the newline, later code breaks this loop. */
4565
4566 if (firstline)
4567 {
4568 USPTR t = start_match;
4569 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4570 end_subject = t;
4571 }
4572
4573 /* Now test for a unique first byte */
4574
4575 if (first_byte >= 0)
4576 {
4577 if (first_byte_caseless)
4578 while (start_match < end_subject &&
4579 md->lcc[*start_match] != first_byte)
4580 start_match++;
4581 else
4582 while (start_match < end_subject && *start_match != first_byte)
4583 start_match++;
4584 }
4585
4586 /* Or to just after a linebreak for a multiline match if possible */
4587
4588 else if (startline)
4589 {
4590 if (start_match > md->start_subject + start_offset)
4591 {
4592 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4593 start_match++;
4594
4595 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4596 and we are now at a LF, advance the match position by one more character.
4597 */
4598
4599 if (start_match[-1] == '\r' &&
4600 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4601 start_match < end_subject &&
4602 *start_match == '\n')
4603 start_match++;
4604 }
4605 }
4606
4607 /* Or to a non-unique first char after study */
4608
4609 else if (start_bits != NULL)
4610 {
4611 while (start_match < end_subject)
4612 {
4613 register unsigned int c = *start_match;
4614 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4615 }
4616 }
4617
4618 /* Restore fudged end_subject */
4619
4620 end_subject = save_end_subject;
4621
4622 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4623 printf(">>>> Match against: ");
4624 pchars(start_match, end_subject - start_match, TRUE, md);
4625 printf("\n");
4626 #endif
4627
4628 /* If req_byte is set, we know that that character must appear in the subject
4629 for the match to succeed. If the first character is set, req_byte must be
4630 later in the subject; otherwise the test starts at the match point. This
4631 optimization can save a huge amount of backtracking in patterns with nested
4632 unlimited repeats that aren't going to match. Writing separate code for
4633 cased/caseless versions makes it go faster, as does using an autoincrement
4634 and backing off on a match.
4635
4636 HOWEVER: when the subject string is very, very long, searching to its end can
4637 take a long time, and give bad performance on quite ordinary patterns. This
4638 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4639 string... so we don't do this when the string is sufficiently long.
4640
4641 ALSO: this processing is disabled when partial matching is requested.
4642 */
4643
4644 if (req_byte >= 0 &&
4645 end_subject - start_match < REQ_BYTE_MAX &&
4646 !md->partial)
4647 {
4648 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4649
4650 /* We don't need to repeat the search if we haven't yet reached the
4651 place we found it at last time. */
4652
4653 if (p > req_byte_ptr)
4654 {
4655 if (req_byte_caseless)
4656 {
4657 while (p < end_subject)
4658 {
4659 register int pp = *p++;
4660 if (pp == req_byte || pp == req_byte2) { p--; break; }
4661 }
4662 }
4663 else
4664 {
4665 while (p < end_subject)
4666 {
4667 if (*p++ == req_byte) { p--; break; }
4668 }
4669 }
4670
4671 /* If we can't find the required character, break the matching loop,
4672 forcing a match failure. */
4673
4674 if (p >= end_subject)
4675 {
4676 rc = MATCH_NOMATCH;
4677 break;
4678 }
4679
4680 /* If we have found the required character, save the point where we
4681 found it, so that we don't search again next time round the loop if
4682 the start hasn't passed this character yet. */
4683
4684 req_byte_ptr = p;
4685 }
4686 }
4687
4688 /* OK, we can now run the match. */
4689
4690 md->start_match_ptr = start_match; /* Insurance */
4691 md->match_call_count = 0;
4692 rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4693
4694 /* Any return other than MATCH_NOMATCH breaks the loop. */
4695
4696 if (rc != MATCH_NOMATCH) break;
4697
4698 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4699 newline in the subject (though it may continue over the newline). Therefore,
4700 if we have just failed to match, starting at a newline, do not continue. */
4701
4702 if (firstline && IS_NEWLINE(start_match)) break;
4703
4704 /* Advance the match position by one character. */
4705
4706 start_match++;
4707 #ifdef SUPPORT_UTF8
4708 if (utf8)
4709 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4710 start_match++;
4711 #endif
4712
4713 /* Break the loop if the pattern is anchored or if we have passed the end of
4714 the subject. */
4715
4716 if (anchored || start_match > end_subject) break;
4717
4718 /* If we have just passed a CR and the newline option is CRLF or ANY or
4719 ANYCRLF, and we are now at a LF, advance the match position by one more
4720 character. */
4721
4722 if (start_match[-1] == '\r' &&
4723 (md->nltype == NLTYPE_ANY ||
4724 md->nltype == NLTYPE_ANYCRLF ||
4725 md->nllen == 2) &&
4726 start_match < end_subject &&
4727 *start_match == '\n')
4728 start_match++;
4729
4730 } /* End of for(;;) "bumpalong" loop */
4731
4732 /* ==========================================================================*/
4733
4734 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4735 conditions is true:
4736
4737 (1) The pattern is anchored;
4738
4739 (2) We are past the end of the subject;
4740
4741 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4742 this option requests that a match occur at or before the first newline in
4743 the subject.
4744
4745 When we have a match and the offset vector is big enough to deal with any
4746 backreferences, captured substring offsets will already be set up. In the case
4747 where we had to get some local store to hold offsets for backreference
4748 processing, copy those that we can. In this case there need not be overflow if
4749 certain parts of the pattern were not used, even though there are more
4750 capturing parentheses than vector slots. */
4751
4752 if (rc == MATCH_MATCH)
4753 {
4754 if (using_temporary_offsets)
4755 {
4756 if (offsetcount >= 4)
4757 {
4758 memcpy(offsets + 2, md->offset_vector + 2,
4759 (offsetcount - 2) * sizeof(int));
4760 DPRINTF(("Copied offsets from temporary memory\n"));
4761 }
4762 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4763 DPRINTF(("Freeing temporary memory\n"));
4764 (pcre_free)(md->offset_vector);
4765 }
4766
4767 /* Set the return code to the number of captured strings, or 0 if there are
4768 too many to fit into the vector. */
4769
4770 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4771
4772 /* If there is space, set up the whole thing as substring 0. The value of
4773 md->start_match_ptr might be modified if \K was encountered on the success
4774 matching path. */
4775
4776 if (offsetcount < 2) rc = 0; else
4777 {
4778 offsets[0] = md->start_match_ptr - md->start_subject;
4779 offsets[1] = md->end_match_ptr - md->start_subject;
4780 }
4781
4782 DPRINTF((">>>> returning %d\n", rc));
4783 return rc;
4784 }
4785
4786 /* Control gets here if there has been an error, or if the overall match
4787 attempt has failed at all permitted starting positions. */
4788
4789 if (using_temporary_offsets)
4790 {
4791 DPRINTF(("Freeing temporary memory\n"));
4792 (pcre_free)(md->offset_vector);
4793 }
4794
4795 if (rc != MATCH_NOMATCH)
4796 {
4797 DPRINTF((">>>> error: returning %d\n", rc));
4798 return rc;
4799 }
4800 else if (md->partial && md->hitend)
4801 {
4802 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4803 return PCRE_ERROR_PARTIAL;
4804 }
4805 else
4806 {
4807 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4808 return PCRE_ERROR_NOMATCH;
4809 }
4810 }
4811
4812 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12