/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 197 - (show annotations) (download)
Tue Jul 31 10:50:18 2007 UTC (7 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 146712 byte(s)
Abolish the NULLWSLIMIT error at the expense of using more stack when an 
unlimited repeat could match an empty string. Also, doc tidies for a test 
release.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #define NLBLOCK md /* Block containing newline information */
46 #define PSSTART start_subject /* Field containing processed string start */
47 #define PSEND end_subject /* Field containing processed string end */
48
49 #include "pcre_internal.h"
50
51 /* Undefine some potentially clashing cpp symbols */
52
53 #undef min
54 #undef max
55
56 /* Flag bits for the match() function */
57
58 #define match_condassert 0x01 /* Called to check a condition assertion */
59 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
60
61 /* Non-error returns from the match() function. Error returns are externally
62 defined PCRE_ERROR_xxx codes, which are all negative. */
63
64 #define MATCH_MATCH 1
65 #define MATCH_NOMATCH 0
66
67 /* Maximum number of ints of offset to save on the stack for recursive calls.
68 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
69 because the offset vector is always a multiple of 3 long. */
70
71 #define REC_STACK_SAVE_MAX 30
72
73 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
74
75 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
76 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
77
78
79
80 #ifdef DEBUG
81 /*************************************************
82 * Debugging function to print chars *
83 *************************************************/
84
85 /* Print a sequence of chars in printable format, stopping at the end of the
86 subject if the requested.
87
88 Arguments:
89 p points to characters
90 length number to print
91 is_subject TRUE if printing from within md->start_subject
92 md pointer to matching data block, if is_subject is TRUE
93
94 Returns: nothing
95 */
96
97 static void
98 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
99 {
100 unsigned int c;
101 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
102 while (length-- > 0)
103 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
104 }
105 #endif
106
107
108
109 /*************************************************
110 * Match a back-reference *
111 *************************************************/
112
113 /* If a back reference hasn't been set, the length that is passed is greater
114 than the number of characters left in the string, so the match fails.
115
116 Arguments:
117 offset index into the offset vector
118 eptr points into the subject
119 length length to be matched
120 md points to match data block
121 ims the ims flags
122
123 Returns: TRUE if matched
124 */
125
126 static BOOL
127 match_ref(int offset, register USPTR eptr, int length, match_data *md,
128 unsigned long int ims)
129 {
130 USPTR p = md->start_subject + md->offset_vector[offset];
131
132 #ifdef DEBUG
133 if (eptr >= md->end_subject)
134 printf("matching subject <null>");
135 else
136 {
137 printf("matching subject ");
138 pchars(eptr, length, TRUE, md);
139 }
140 printf(" against backref ");
141 pchars(p, length, FALSE, md);
142 printf("\n");
143 #endif
144
145 /* Always fail if not enough characters left */
146
147 if (length > md->end_subject - eptr) return FALSE;
148
149 /* Separate the caselesss case for speed */
150
151 if ((ims & PCRE_CASELESS) != 0)
152 {
153 while (length-- > 0)
154 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
155 }
156 else
157 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
158
159 return TRUE;
160 }
161
162
163
164 /***************************************************************************
165 ****************************************************************************
166 RECURSION IN THE match() FUNCTION
167
168 The match() function is highly recursive, though not every recursive call
169 increases the recursive depth. Nevertheless, some regular expressions can cause
170 it to recurse to a great depth. I was writing for Unix, so I just let it call
171 itself recursively. This uses the stack for saving everything that has to be
172 saved for a recursive call. On Unix, the stack can be large, and this works
173 fine.
174
175 It turns out that on some non-Unix-like systems there are problems with
176 programs that use a lot of stack. (This despite the fact that every last chip
177 has oodles of memory these days, and techniques for extending the stack have
178 been known for decades.) So....
179
180 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
181 calls by keeping local variables that need to be preserved in blocks of memory
182 obtained from malloc() instead instead of on the stack. Macros are used to
183 achieve this so that the actual code doesn't look very different to what it
184 always used to.
185
186 The original heap-recursive code used longjmp(). However, it seems that this
187 can be very slow on some operating systems. Following a suggestion from Stan
188 Switzer, the use of longjmp() has been abolished, at the cost of having to
189 provide a unique number for each call to RMATCH. There is no way of generating
190 a sequence of numbers at compile time in C. I have given them names, to make
191 them stand out more clearly.
192
193 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
194 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
195 tests. Furthermore, not using longjmp() means that local dynamic variables
196 don't have indeterminate values; this has meant that the frame size can be
197 reduced because the result can be "passed back" by straight setting of the
198 variable instead of being passed in the frame.
199 ****************************************************************************
200 ***************************************************************************/
201
202
203 /* Numbers for RMATCH calls */
204
205 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
206 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
207 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
208 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
209 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
210
211
212 /* These versions of the macros use the stack, as normal. There are debugging
213 versions and production versions. Note that the "rw" argument of RMATCH isn't
214 actuall used in this definition. */
215
216 #ifndef NO_RECURSE
217 #define REGISTER register
218
219 #ifdef DEBUG
220 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
221 { \
222 printf("match() called in line %d\n", __LINE__); \
223 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
224 printf("to line %d\n", __LINE__); \
225 }
226 #define RRETURN(ra) \
227 { \
228 printf("match() returned %d from line %d ", ra, __LINE__); \
229 return ra; \
230 }
231 #else
232 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
234 #define RRETURN(ra) return ra
235 #endif
236
237 #else
238
239
240 /* These versions of the macros manage a private stack on the heap. Note that
241 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
242 argument of match(), which never changes. */
243
244 #define REGISTER
245
246 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
247 {\
248 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
249 frame->Xwhere = rw; \
250 newframe->Xeptr = ra;\
251 newframe->Xecode = rb;\
252 newframe->Xmstart = mstart;\
253 newframe->Xoffset_top = rc;\
254 newframe->Xims = re;\
255 newframe->Xeptrb = rf;\
256 newframe->Xflags = rg;\
257 newframe->Xrdepth = frame->Xrdepth + 1;\
258 newframe->Xprevframe = frame;\
259 frame = newframe;\
260 DPRINTF(("restarting from line %d\n", __LINE__));\
261 goto HEAP_RECURSE;\
262 L_##rw:\
263 DPRINTF(("jumped back to line %d\n", __LINE__));\
264 }
265
266 #define RRETURN(ra)\
267 {\
268 heapframe *newframe = frame;\
269 frame = newframe->Xprevframe;\
270 (pcre_stack_free)(newframe);\
271 if (frame != NULL)\
272 {\
273 rrc = ra;\
274 goto HEAP_RETURN;\
275 }\
276 return ra;\
277 }
278
279
280 /* Structure for remembering the local variables in a private frame */
281
282 typedef struct heapframe {
283 struct heapframe *Xprevframe;
284
285 /* Function arguments that may change */
286
287 const uschar *Xeptr;
288 const uschar *Xecode;
289 const uschar *Xmstart;
290 int Xoffset_top;
291 long int Xims;
292 eptrblock *Xeptrb;
293 int Xflags;
294 unsigned int Xrdepth;
295
296 /* Function local variables */
297
298 const uschar *Xcallpat;
299 const uschar *Xcharptr;
300 const uschar *Xdata;
301 const uschar *Xnext;
302 const uschar *Xpp;
303 const uschar *Xprev;
304 const uschar *Xsaved_eptr;
305
306 recursion_info Xnew_recursive;
307
308 BOOL Xcur_is_word;
309 BOOL Xcondition;
310 BOOL Xprev_is_word;
311
312 unsigned long int Xoriginal_ims;
313
314 #ifdef SUPPORT_UCP
315 int Xprop_type;
316 int Xprop_value;
317 int Xprop_fail_result;
318 int Xprop_category;
319 int Xprop_chartype;
320 int Xprop_script;
321 int Xoclength;
322 uschar Xocchars[8];
323 #endif
324
325 int Xctype;
326 unsigned int Xfc;
327 int Xfi;
328 int Xlength;
329 int Xmax;
330 int Xmin;
331 int Xnumber;
332 int Xoffset;
333 int Xop;
334 int Xsave_capture_last;
335 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
336 int Xstacksave[REC_STACK_SAVE_MAX];
337
338 eptrblock Xnewptrb;
339
340 /* Where to jump back to */
341
342 int Xwhere;
343
344 } heapframe;
345
346 #endif
347
348
349 /***************************************************************************
350 ***************************************************************************/
351
352
353
354 /*************************************************
355 * Match from current position *
356 *************************************************/
357
358 /* This function is called recursively in many circumstances. Whenever it
359 returns a negative (error) response, the outer incarnation must also return the
360 same response.
361
362 Performance note: It might be tempting to extract commonly used fields from the
363 md structure (e.g. utf8, end_subject) into individual variables to improve
364 performance. Tests using gcc on a SPARC disproved this; in the first case, it
365 made performance worse.
366
367 Arguments:
368 eptr pointer to current character in subject
369 ecode pointer to current position in compiled code
370 mstart pointer to the current match start position (can be modified
371 by encountering \K)
372 offset_top current top pointer
373 md pointer to "static" info for the match
374 ims current /i, /m, and /s options
375 eptrb pointer to chain of blocks containing eptr at start of
376 brackets - for testing for empty matches
377 flags can contain
378 match_condassert - this is an assertion condition
379 match_cbegroup - this is the start of an unlimited repeat
380 group that can match an empty string
381 rdepth the recursion depth
382
383 Returns: MATCH_MATCH if matched ) these values are >= 0
384 MATCH_NOMATCH if failed to match )
385 a negative PCRE_ERROR_xxx value if aborted by an error condition
386 (e.g. stopped by repeated call or recursion limit)
387 */
388
389 static int
390 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
391 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
392 int flags, unsigned int rdepth)
393 {
394 /* These variables do not need to be preserved over recursion in this function,
395 so they can be ordinary variables in all cases. Mark some of them with
396 "register" because they are used a lot in loops. */
397
398 register int rrc; /* Returns from recursive calls */
399 register int i; /* Used for loops not involving calls to RMATCH() */
400 register unsigned int c; /* Character values not kept over RMATCH() calls */
401 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
402
403 BOOL minimize, possessive; /* Quantifier options */
404
405 /* When recursion is not being used, all "local" variables that have to be
406 preserved over calls to RMATCH() are part of a "frame" which is obtained from
407 heap storage. Set up the top-level frame here; others are obtained from the
408 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
409
410 #ifdef NO_RECURSE
411 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
412 frame->Xprevframe = NULL; /* Marks the top level */
413
414 /* Copy in the original argument variables */
415
416 frame->Xeptr = eptr;
417 frame->Xecode = ecode;
418 frame->Xmstart = mstart;
419 frame->Xoffset_top = offset_top;
420 frame->Xims = ims;
421 frame->Xeptrb = eptrb;
422 frame->Xflags = flags;
423 frame->Xrdepth = rdepth;
424
425 /* This is where control jumps back to to effect "recursion" */
426
427 HEAP_RECURSE:
428
429 /* Macros make the argument variables come from the current frame */
430
431 #define eptr frame->Xeptr
432 #define ecode frame->Xecode
433 #define mstart frame->Xmstart
434 #define offset_top frame->Xoffset_top
435 #define ims frame->Xims
436 #define eptrb frame->Xeptrb
437 #define flags frame->Xflags
438 #define rdepth frame->Xrdepth
439
440 /* Ditto for the local variables */
441
442 #ifdef SUPPORT_UTF8
443 #define charptr frame->Xcharptr
444 #endif
445 #define callpat frame->Xcallpat
446 #define data frame->Xdata
447 #define next frame->Xnext
448 #define pp frame->Xpp
449 #define prev frame->Xprev
450 #define saved_eptr frame->Xsaved_eptr
451
452 #define new_recursive frame->Xnew_recursive
453
454 #define cur_is_word frame->Xcur_is_word
455 #define condition frame->Xcondition
456 #define prev_is_word frame->Xprev_is_word
457
458 #define original_ims frame->Xoriginal_ims
459
460 #ifdef SUPPORT_UCP
461 #define prop_type frame->Xprop_type
462 #define prop_value frame->Xprop_value
463 #define prop_fail_result frame->Xprop_fail_result
464 #define prop_category frame->Xprop_category
465 #define prop_chartype frame->Xprop_chartype
466 #define prop_script frame->Xprop_script
467 #define oclength frame->Xoclength
468 #define occhars frame->Xocchars
469 #endif
470
471 #define ctype frame->Xctype
472 #define fc frame->Xfc
473 #define fi frame->Xfi
474 #define length frame->Xlength
475 #define max frame->Xmax
476 #define min frame->Xmin
477 #define number frame->Xnumber
478 #define offset frame->Xoffset
479 #define op frame->Xop
480 #define save_capture_last frame->Xsave_capture_last
481 #define save_offset1 frame->Xsave_offset1
482 #define save_offset2 frame->Xsave_offset2
483 #define save_offset3 frame->Xsave_offset3
484 #define stacksave frame->Xstacksave
485
486 #define newptrb frame->Xnewptrb
487
488 /* When recursion is being used, local variables are allocated on the stack and
489 get preserved during recursion in the normal way. In this environment, fi and
490 i, and fc and c, can be the same variables. */
491
492 #else /* NO_RECURSE not defined */
493 #define fi i
494 #define fc c
495
496
497 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
498 const uschar *charptr; /* in small blocks of the code. My normal */
499 #endif /* style of coding would have declared */
500 const uschar *callpat; /* them within each of those blocks. */
501 const uschar *data; /* However, in order to accommodate the */
502 const uschar *next; /* version of this code that uses an */
503 USPTR pp; /* external "stack" implemented on the */
504 const uschar *prev; /* heap, it is easier to declare them all */
505 USPTR saved_eptr; /* here, so the declarations can be cut */
506 /* out in a block. The only declarations */
507 recursion_info new_recursive; /* within blocks below are for variables */
508 /* that do not have to be preserved over */
509 BOOL cur_is_word; /* a recursive call to RMATCH(). */
510 BOOL condition;
511 BOOL prev_is_word;
512
513 unsigned long int original_ims;
514
515 #ifdef SUPPORT_UCP
516 int prop_type;
517 int prop_value;
518 int prop_fail_result;
519 int prop_category;
520 int prop_chartype;
521 int prop_script;
522 int oclength;
523 uschar occhars[8];
524 #endif
525
526 int ctype;
527 int length;
528 int max;
529 int min;
530 int number;
531 int offset;
532 int op;
533 int save_capture_last;
534 int save_offset1, save_offset2, save_offset3;
535 int stacksave[REC_STACK_SAVE_MAX];
536
537 eptrblock newptrb;
538 #endif /* NO_RECURSE */
539
540 /* These statements are here to stop the compiler complaining about unitialized
541 variables. */
542
543 #ifdef SUPPORT_UCP
544 prop_value = 0;
545 prop_fail_result = 0;
546 #endif
547
548
549 /* This label is used for tail recursion, which is used in a few cases even
550 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
551 used. Thanks to Ian Taylor for noticing this possibility and sending the
552 original patch. */
553
554 TAIL_RECURSE:
555
556 /* OK, now we can get on with the real code of the function. Recursive calls
557 are specified by the macro RMATCH and RRETURN is used to return. When
558 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
559 and a "return", respectively (possibly with some debugging if DEBUG is
560 defined). However, RMATCH isn't like a function call because it's quite a
561 complicated macro. It has to be used in one particular way. This shouldn't,
562 however, impact performance when true recursion is being used. */
563
564 #ifdef SUPPORT_UTF8
565 utf8 = md->utf8; /* Local copy of the flag */
566 #else
567 utf8 = FALSE;
568 #endif
569
570 /* First check that we haven't called match() too many times, or that we
571 haven't exceeded the recursive call limit. */
572
573 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
574 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
575
576 original_ims = ims; /* Save for resetting on ')' */
577
578 /* At the start of a group with an unlimited repeat that may match an empty
579 string, the match_cbegroup flag is set. When this is the case, add the current
580 subject pointer to the chain of such remembered pointers, to be checked when we
581 hit the closing ket, in order to break infinite loops that match no characters.
582 When match() is called in other circumstances, don't add to the chain. The
583 match_cbegroup flag must NOT be used with tail recursion, because the memory
584 block that is used is on the stack, so a new one may be required for each
585 match(). */
586
587 if ((flags & match_cbegroup) != 0)
588 {
589 newptrb.epb_saved_eptr = eptr;
590 newptrb.epb_prev = eptrb;
591 eptrb = &newptrb;
592 }
593
594 /* Now start processing the opcodes. */
595
596 for (;;)
597 {
598 minimize = possessive = FALSE;
599 op = *ecode;
600
601 /* For partial matching, remember if we ever hit the end of the subject after
602 matching at least one subject character. */
603
604 if (md->partial &&
605 eptr >= md->end_subject &&
606 eptr > mstart)
607 md->hitend = TRUE;
608
609 switch(op)
610 {
611 /* Handle a capturing bracket. If there is space in the offset vector, save
612 the current subject position in the working slot at the top of the vector.
613 We mustn't change the current values of the data slot, because they may be
614 set from a previous iteration of this group, and be referred to by a
615 reference inside the group.
616
617 If the bracket fails to match, we need to restore this value and also the
618 values of the final offsets, in case they were set by a previous iteration
619 of the same bracket.
620
621 If there isn't enough space in the offset vector, treat this as if it were
622 a non-capturing bracket. Don't worry about setting the flag for the error
623 case here; that is handled in the code for KET. */
624
625 case OP_CBRA:
626 case OP_SCBRA:
627 number = GET2(ecode, 1+LINK_SIZE);
628 offset = number << 1;
629
630 #ifdef DEBUG
631 printf("start bracket %d\n", number);
632 printf("subject=");
633 pchars(eptr, 16, TRUE, md);
634 printf("\n");
635 #endif
636
637 if (offset < md->offset_max)
638 {
639 save_offset1 = md->offset_vector[offset];
640 save_offset2 = md->offset_vector[offset+1];
641 save_offset3 = md->offset_vector[md->offset_end - number];
642 save_capture_last = md->capture_last;
643
644 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
645 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
646
647 flags = (op == OP_SCBRA)? match_cbegroup : 0;
648 do
649 {
650 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
651 ims, eptrb, flags, RM1);
652 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
653 md->capture_last = save_capture_last;
654 ecode += GET(ecode, 1);
655 }
656 while (*ecode == OP_ALT);
657
658 DPRINTF(("bracket %d failed\n", number));
659
660 md->offset_vector[offset] = save_offset1;
661 md->offset_vector[offset+1] = save_offset2;
662 md->offset_vector[md->offset_end - number] = save_offset3;
663
664 RRETURN(MATCH_NOMATCH);
665 }
666
667 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
668 as a non-capturing bracket. */
669
670 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
671 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
672
673 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
674
675 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
677
678 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
679 final alternative within the brackets, we would return the result of a
680 recursive call to match() whatever happened. We can reduce stack usage by
681 turning this into a tail recursion, except in the case when match_cbegroup
682 is set.*/
683
684 case OP_BRA:
685 case OP_SBRA:
686 DPRINTF(("start non-capturing bracket\n"));
687 flags = (op >= OP_SBRA)? match_cbegroup : 0;
688 for (;;)
689 {
690 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
691 {
692 if (flags == 0) /* Not a possibly empty group */
693 {
694 ecode += _pcre_OP_lengths[*ecode];
695 DPRINTF(("bracket 0 tail recursion\n"));
696 goto TAIL_RECURSE;
697 }
698
699 /* Possibly empty group; can't use tail recursion. */
700
701 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
702 eptrb, flags, RM48);
703 RRETURN(rrc);
704 }
705
706 /* For non-final alternatives, continue the loop for a NOMATCH result;
707 otherwise return. */
708
709 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
710 eptrb, flags, RM2);
711 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
712 ecode += GET(ecode, 1);
713 }
714 /* Control never reaches here. */
715
716 /* Conditional group: compilation checked that there are no more than
717 two branches. If the condition is false, skipping the first branch takes us
718 past the end if there is only one branch, but that's OK because that is
719 exactly what going to the ket would do. As there is only one branch to be
720 obeyed, we can use tail recursion to avoid using another stack frame. */
721
722 case OP_COND:
723 case OP_SCOND:
724 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
725 {
726 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
727 condition = md->recursive != NULL &&
728 (offset == RREF_ANY || offset == md->recursive->group_num);
729 ecode += condition? 3 : GET(ecode, 1);
730 }
731
732 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
733 {
734 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
735 condition = offset < offset_top && md->offset_vector[offset] >= 0;
736 ecode += condition? 3 : GET(ecode, 1);
737 }
738
739 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
740 {
741 condition = FALSE;
742 ecode += GET(ecode, 1);
743 }
744
745 /* The condition is an assertion. Call match() to evaluate it - setting
746 the final argument match_condassert causes it to stop at the end of an
747 assertion. */
748
749 else
750 {
751 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
752 match_condassert, RM3);
753 if (rrc == MATCH_MATCH)
754 {
755 condition = TRUE;
756 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
757 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
758 }
759 else if (rrc != MATCH_NOMATCH)
760 {
761 RRETURN(rrc); /* Need braces because of following else */
762 }
763 else
764 {
765 condition = FALSE;
766 ecode += GET(ecode, 1);
767 }
768 }
769
770 /* We are now at the branch that is to be obeyed. As there is only one,
771 we can use tail recursion to avoid using another stack frame, except when
772 match_cbegroup is required for an unlimited repeat of a possibly empty
773 group. If the second alternative doesn't exist, we can just plough on. */
774
775 if (condition || *ecode == OP_ALT)
776 {
777 ecode += 1 + LINK_SIZE;
778 if (op == OP_SCOND) /* Possibly empty group */
779 {
780 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
781 RRETURN(rrc);
782 }
783 else /* Group must match something */
784 {
785 flags = 0;
786 goto TAIL_RECURSE;
787 }
788 }
789 else /* Condition false & no 2nd alternative */
790 {
791 ecode += 1 + LINK_SIZE;
792 }
793 break;
794
795
796 /* End of the pattern. If we are in a top-level recursion, we should
797 restore the offsets appropriately and continue from after the call. */
798
799 case OP_END:
800 if (md->recursive != NULL && md->recursive->group_num == 0)
801 {
802 recursion_info *rec = md->recursive;
803 DPRINTF(("End of pattern in a (?0) recursion\n"));
804 md->recursive = rec->prevrec;
805 memmove(md->offset_vector, rec->offset_save,
806 rec->saved_max * sizeof(int));
807 mstart = rec->save_start;
808 ims = original_ims;
809 ecode = rec->after_call;
810 break;
811 }
812
813 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
814 string - backtracking will then try other alternatives, if any. */
815
816 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
817 md->end_match_ptr = eptr; /* Record where we ended */
818 md->end_offset_top = offset_top; /* and how many extracts were taken */
819 md->start_match_ptr = mstart; /* and the start (\K can modify) */
820 RRETURN(MATCH_MATCH);
821
822 /* Change option settings */
823
824 case OP_OPT:
825 ims = ecode[1];
826 ecode += 2;
827 DPRINTF(("ims set to %02lx\n", ims));
828 break;
829
830 /* Assertion brackets. Check the alternative branches in turn - the
831 matching won't pass the KET for an assertion. If any one branch matches,
832 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
833 start of each branch to move the current point backwards, so the code at
834 this level is identical to the lookahead case. */
835
836 case OP_ASSERT:
837 case OP_ASSERTBACK:
838 do
839 {
840 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
841 RM4);
842 if (rrc == MATCH_MATCH) break;
843 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
844 ecode += GET(ecode, 1);
845 }
846 while (*ecode == OP_ALT);
847 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
848
849 /* If checking an assertion for a condition, return MATCH_MATCH. */
850
851 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
852
853 /* Continue from after the assertion, updating the offsets high water
854 mark, since extracts may have been taken during the assertion. */
855
856 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
857 ecode += 1 + LINK_SIZE;
858 offset_top = md->end_offset_top;
859 continue;
860
861 /* Negative assertion: all branches must fail to match */
862
863 case OP_ASSERT_NOT:
864 case OP_ASSERTBACK_NOT:
865 do
866 {
867 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
868 RM5);
869 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
870 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
871 ecode += GET(ecode,1);
872 }
873 while (*ecode == OP_ALT);
874
875 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
876
877 ecode += 1 + LINK_SIZE;
878 continue;
879
880 /* Move the subject pointer back. This occurs only at the start of
881 each branch of a lookbehind assertion. If we are too close to the start to
882 move back, this match function fails. When working with UTF-8 we move
883 back a number of characters, not bytes. */
884
885 case OP_REVERSE:
886 #ifdef SUPPORT_UTF8
887 if (utf8)
888 {
889 i = GET(ecode, 1);
890 while (i-- > 0)
891 {
892 eptr--;
893 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
894 BACKCHAR(eptr)
895 }
896 }
897 else
898 #endif
899
900 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
901
902 {
903 eptr -= GET(ecode, 1);
904 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
905 }
906
907 /* Skip to next op code */
908
909 ecode += 1 + LINK_SIZE;
910 break;
911
912 /* The callout item calls an external function, if one is provided, passing
913 details of the match so far. This is mainly for debugging, though the
914 function is able to force a failure. */
915
916 case OP_CALLOUT:
917 if (pcre_callout != NULL)
918 {
919 pcre_callout_block cb;
920 cb.version = 1; /* Version 1 of the callout block */
921 cb.callout_number = ecode[1];
922 cb.offset_vector = md->offset_vector;
923 cb.subject = (PCRE_SPTR)md->start_subject;
924 cb.subject_length = md->end_subject - md->start_subject;
925 cb.start_match = mstart - md->start_subject;
926 cb.current_position = eptr - md->start_subject;
927 cb.pattern_position = GET(ecode, 2);
928 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
929 cb.capture_top = offset_top/2;
930 cb.capture_last = md->capture_last;
931 cb.callout_data = md->callout_data;
932 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
933 if (rrc < 0) RRETURN(rrc);
934 }
935 ecode += 2 + 2*LINK_SIZE;
936 break;
937
938 /* Recursion either matches the current regex, or some subexpression. The
939 offset data is the offset to the starting bracket from the start of the
940 whole pattern. (This is so that it works from duplicated subpatterns.)
941
942 If there are any capturing brackets started but not finished, we have to
943 save their starting points and reinstate them after the recursion. However,
944 we don't know how many such there are (offset_top records the completed
945 total) so we just have to save all the potential data. There may be up to
946 65535 such values, which is too large to put on the stack, but using malloc
947 for small numbers seems expensive. As a compromise, the stack is used when
948 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
949 is used. A problem is what to do if the malloc fails ... there is no way of
950 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
951 values on the stack, and accept that the rest may be wrong.
952
953 There are also other values that have to be saved. We use a chained
954 sequence of blocks that actually live on the stack. Thanks to Robin Houston
955 for the original version of this logic. */
956
957 case OP_RECURSE:
958 {
959 callpat = md->start_code + GET(ecode, 1);
960 new_recursive.group_num = (callpat == md->start_code)? 0 :
961 GET2(callpat, 1 + LINK_SIZE);
962
963 /* Add to "recursing stack" */
964
965 new_recursive.prevrec = md->recursive;
966 md->recursive = &new_recursive;
967
968 /* Find where to continue from afterwards */
969
970 ecode += 1 + LINK_SIZE;
971 new_recursive.after_call = ecode;
972
973 /* Now save the offset data. */
974
975 new_recursive.saved_max = md->offset_end;
976 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
977 new_recursive.offset_save = stacksave;
978 else
979 {
980 new_recursive.offset_save =
981 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
982 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
983 }
984
985 memcpy(new_recursive.offset_save, md->offset_vector,
986 new_recursive.saved_max * sizeof(int));
987 new_recursive.save_start = mstart;
988 mstart = eptr;
989
990 /* OK, now we can do the recursion. For each top-level alternative we
991 restore the offset and recursion data. */
992
993 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
994 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
995 do
996 {
997 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
998 md, ims, eptrb, flags, RM6);
999 if (rrc == MATCH_MATCH)
1000 {
1001 DPRINTF(("Recursion matched\n"));
1002 md->recursive = new_recursive.prevrec;
1003 if (new_recursive.offset_save != stacksave)
1004 (pcre_free)(new_recursive.offset_save);
1005 RRETURN(MATCH_MATCH);
1006 }
1007 else if (rrc != MATCH_NOMATCH)
1008 {
1009 DPRINTF(("Recursion gave error %d\n", rrc));
1010 RRETURN(rrc);
1011 }
1012
1013 md->recursive = &new_recursive;
1014 memcpy(md->offset_vector, new_recursive.offset_save,
1015 new_recursive.saved_max * sizeof(int));
1016 callpat += GET(callpat, 1);
1017 }
1018 while (*callpat == OP_ALT);
1019
1020 DPRINTF(("Recursion didn't match\n"));
1021 md->recursive = new_recursive.prevrec;
1022 if (new_recursive.offset_save != stacksave)
1023 (pcre_free)(new_recursive.offset_save);
1024 RRETURN(MATCH_NOMATCH);
1025 }
1026 /* Control never reaches here */
1027
1028 /* "Once" brackets are like assertion brackets except that after a match,
1029 the point in the subject string is not moved back. Thus there can never be
1030 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1031 Check the alternative branches in turn - the matching won't pass the KET
1032 for this kind of subpattern. If any one branch matches, we carry on as at
1033 the end of a normal bracket, leaving the subject pointer. */
1034
1035 case OP_ONCE:
1036 prev = ecode;
1037 saved_eptr = eptr;
1038
1039 do
1040 {
1041 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1042 if (rrc == MATCH_MATCH) break;
1043 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1044 ecode += GET(ecode,1);
1045 }
1046 while (*ecode == OP_ALT);
1047
1048 /* If hit the end of the group (which could be repeated), fail */
1049
1050 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1051
1052 /* Continue as from after the assertion, updating the offsets high water
1053 mark, since extracts may have been taken. */
1054
1055 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1056
1057 offset_top = md->end_offset_top;
1058 eptr = md->end_match_ptr;
1059
1060 /* For a non-repeating ket, just continue at this level. This also
1061 happens for a repeating ket if no characters were matched in the group.
1062 This is the forcible breaking of infinite loops as implemented in Perl
1063 5.005. If there is an options reset, it will get obeyed in the normal
1064 course of events. */
1065
1066 if (*ecode == OP_KET || eptr == saved_eptr)
1067 {
1068 ecode += 1+LINK_SIZE;
1069 break;
1070 }
1071
1072 /* The repeating kets try the rest of the pattern or restart from the
1073 preceding bracket, in the appropriate order. The second "call" of match()
1074 uses tail recursion, to avoid using another stack frame. We need to reset
1075 any options that changed within the bracket before re-running it, so
1076 check the next opcode. */
1077
1078 if (ecode[1+LINK_SIZE] == OP_OPT)
1079 {
1080 ims = (ims & ~PCRE_IMS) | ecode[4];
1081 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1082 }
1083
1084 if (*ecode == OP_KETRMIN)
1085 {
1086 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1087 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1088 ecode = prev;
1089 flags = 0;
1090 goto TAIL_RECURSE;
1091 }
1092 else /* OP_KETRMAX */
1093 {
1094 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1095 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1096 ecode += 1 + LINK_SIZE;
1097 flags = 0;
1098 goto TAIL_RECURSE;
1099 }
1100 /* Control never gets here */
1101
1102 /* An alternation is the end of a branch; scan along to find the end of the
1103 bracketed group and go to there. */
1104
1105 case OP_ALT:
1106 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1107 break;
1108
1109 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1110 that it may occur zero times. It may repeat infinitely, or not at all -
1111 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1112 repeat limits are compiled as a number of copies, with the optional ones
1113 preceded by BRAZERO or BRAMINZERO. */
1114
1115 case OP_BRAZERO:
1116 {
1117 next = ecode+1;
1118 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1119 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1120 do next += GET(next,1); while (*next == OP_ALT);
1121 ecode = next + 1 + LINK_SIZE;
1122 }
1123 break;
1124
1125 case OP_BRAMINZERO:
1126 {
1127 next = ecode+1;
1128 do next += GET(next, 1); while (*next == OP_ALT);
1129 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1130 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131 ecode++;
1132 }
1133 break;
1134
1135 /* End of a group, repeated or non-repeating. */
1136
1137 case OP_KET:
1138 case OP_KETRMIN:
1139 case OP_KETRMAX:
1140 prev = ecode - GET(ecode, 1);
1141
1142 /* If this was a group that remembered the subject start, in order to break
1143 infinite repeats of empty string matches, retrieve the subject start from
1144 the chain. Otherwise, set it NULL. */
1145
1146 if (*prev >= OP_SBRA)
1147 {
1148 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1149 eptrb = eptrb->epb_prev; /* Backup to previous group */
1150 }
1151 else saved_eptr = NULL;
1152
1153 /* If we are at the end of an assertion group, stop matching and return
1154 MATCH_MATCH, but record the current high water mark for use by positive
1155 assertions. Do this also for the "once" (atomic) groups. */
1156
1157 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1158 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1159 *prev == OP_ONCE)
1160 {
1161 md->end_match_ptr = eptr; /* For ONCE */
1162 md->end_offset_top = offset_top;
1163 RRETURN(MATCH_MATCH);
1164 }
1165
1166 /* For capturing groups we have to check the group number back at the start
1167 and if necessary complete handling an extraction by setting the offsets and
1168 bumping the high water mark. Note that whole-pattern recursion is coded as
1169 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1170 when the OP_END is reached. Other recursion is handled here. */
1171
1172 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1173 {
1174 number = GET2(prev, 1+LINK_SIZE);
1175 offset = number << 1;
1176
1177 #ifdef DEBUG
1178 printf("end bracket %d", number);
1179 printf("\n");
1180 #endif
1181
1182 md->capture_last = number;
1183 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1184 {
1185 md->offset_vector[offset] =
1186 md->offset_vector[md->offset_end - number];
1187 md->offset_vector[offset+1] = eptr - md->start_subject;
1188 if (offset_top <= offset) offset_top = offset + 2;
1189 }
1190
1191 /* Handle a recursively called group. Restore the offsets
1192 appropriately and continue from after the call. */
1193
1194 if (md->recursive != NULL && md->recursive->group_num == number)
1195 {
1196 recursion_info *rec = md->recursive;
1197 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1198 md->recursive = rec->prevrec;
1199 mstart = rec->save_start;
1200 memcpy(md->offset_vector, rec->offset_save,
1201 rec->saved_max * sizeof(int));
1202 ecode = rec->after_call;
1203 ims = original_ims;
1204 break;
1205 }
1206 }
1207
1208 /* For both capturing and non-capturing groups, reset the value of the ims
1209 flags, in case they got changed during the group. */
1210
1211 ims = original_ims;
1212 DPRINTF(("ims reset to %02lx\n", ims));
1213
1214 /* For a non-repeating ket, just continue at this level. This also
1215 happens for a repeating ket if no characters were matched in the group.
1216 This is the forcible breaking of infinite loops as implemented in Perl
1217 5.005. If there is an options reset, it will get obeyed in the normal
1218 course of events. */
1219
1220 if (*ecode == OP_KET || eptr == saved_eptr)
1221 {
1222 ecode += 1 + LINK_SIZE;
1223 break;
1224 }
1225
1226 /* The repeating kets try the rest of the pattern or restart from the
1227 preceding bracket, in the appropriate order. In the second case, we can use
1228 tail recursion to avoid using another stack frame, unless we have an
1229 unlimited repeat of a group that can match an empty string. */
1230
1231 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1232
1233 if (*ecode == OP_KETRMIN)
1234 {
1235 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1236 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1237 if (flags != 0) /* Could match an empty string */
1238 {
1239 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1240 RRETURN(rrc);
1241 }
1242 ecode = prev;
1243 goto TAIL_RECURSE;
1244 }
1245 else /* OP_KETRMAX */
1246 {
1247 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1248 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1249 ecode += 1 + LINK_SIZE;
1250 flags = 0;
1251 goto TAIL_RECURSE;
1252 }
1253 /* Control never gets here */
1254
1255 /* Start of subject unless notbol, or after internal newline if multiline */
1256
1257 case OP_CIRC:
1258 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1259 if ((ims & PCRE_MULTILINE) != 0)
1260 {
1261 if (eptr != md->start_subject &&
1262 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1263 RRETURN(MATCH_NOMATCH);
1264 ecode++;
1265 break;
1266 }
1267 /* ... else fall through */
1268
1269 /* Start of subject assertion */
1270
1271 case OP_SOD:
1272 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1273 ecode++;
1274 break;
1275
1276 /* Start of match assertion */
1277
1278 case OP_SOM:
1279 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1280 ecode++;
1281 break;
1282
1283 /* Reset the start of match point */
1284
1285 case OP_SET_SOM:
1286 mstart = eptr;
1287 ecode++;
1288 break;
1289
1290 /* Assert before internal newline if multiline, or before a terminating
1291 newline unless endonly is set, else end of subject unless noteol is set. */
1292
1293 case OP_DOLL:
1294 if ((ims & PCRE_MULTILINE) != 0)
1295 {
1296 if (eptr < md->end_subject)
1297 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1298 else
1299 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1300 ecode++;
1301 break;
1302 }
1303 else
1304 {
1305 if (md->noteol) RRETURN(MATCH_NOMATCH);
1306 if (!md->endonly)
1307 {
1308 if (eptr != md->end_subject &&
1309 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1310 RRETURN(MATCH_NOMATCH);
1311 ecode++;
1312 break;
1313 }
1314 }
1315 /* ... else fall through for endonly */
1316
1317 /* End of subject assertion (\z) */
1318
1319 case OP_EOD:
1320 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1321 ecode++;
1322 break;
1323
1324 /* End of subject or ending \n assertion (\Z) */
1325
1326 case OP_EODN:
1327 if (eptr != md->end_subject &&
1328 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1329 RRETURN(MATCH_NOMATCH);
1330 ecode++;
1331 break;
1332
1333 /* Word boundary assertions */
1334
1335 case OP_NOT_WORD_BOUNDARY:
1336 case OP_WORD_BOUNDARY:
1337 {
1338
1339 /* Find out if the previous and current characters are "word" characters.
1340 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1341 be "non-word" characters. */
1342
1343 #ifdef SUPPORT_UTF8
1344 if (utf8)
1345 {
1346 if (eptr == md->start_subject) prev_is_word = FALSE; else
1347 {
1348 const uschar *lastptr = eptr - 1;
1349 while((*lastptr & 0xc0) == 0x80) lastptr--;
1350 GETCHAR(c, lastptr);
1351 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1352 }
1353 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1354 {
1355 GETCHAR(c, eptr);
1356 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1357 }
1358 }
1359 else
1360 #endif
1361
1362 /* More streamlined when not in UTF-8 mode */
1363
1364 {
1365 prev_is_word = (eptr != md->start_subject) &&
1366 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1367 cur_is_word = (eptr < md->end_subject) &&
1368 ((md->ctypes[*eptr] & ctype_word) != 0);
1369 }
1370
1371 /* Now see if the situation is what we want */
1372
1373 if ((*ecode++ == OP_WORD_BOUNDARY)?
1374 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1375 RRETURN(MATCH_NOMATCH);
1376 }
1377 break;
1378
1379 /* Match a single character type; inline for speed */
1380
1381 case OP_ANY:
1382 if ((ims & PCRE_DOTALL) == 0)
1383 {
1384 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1385 }
1386 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1387 if (utf8)
1388 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1389 ecode++;
1390 break;
1391
1392 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1393 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1394
1395 case OP_ANYBYTE:
1396 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1397 ecode++;
1398 break;
1399
1400 case OP_NOT_DIGIT:
1401 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1402 GETCHARINCTEST(c, eptr);
1403 if (
1404 #ifdef SUPPORT_UTF8
1405 c < 256 &&
1406 #endif
1407 (md->ctypes[c] & ctype_digit) != 0
1408 )
1409 RRETURN(MATCH_NOMATCH);
1410 ecode++;
1411 break;
1412
1413 case OP_DIGIT:
1414 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1415 GETCHARINCTEST(c, eptr);
1416 if (
1417 #ifdef SUPPORT_UTF8
1418 c >= 256 ||
1419 #endif
1420 (md->ctypes[c] & ctype_digit) == 0
1421 )
1422 RRETURN(MATCH_NOMATCH);
1423 ecode++;
1424 break;
1425
1426 case OP_NOT_WHITESPACE:
1427 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1428 GETCHARINCTEST(c, eptr);
1429 if (
1430 #ifdef SUPPORT_UTF8
1431 c < 256 &&
1432 #endif
1433 (md->ctypes[c] & ctype_space) != 0
1434 )
1435 RRETURN(MATCH_NOMATCH);
1436 ecode++;
1437 break;
1438
1439 case OP_WHITESPACE:
1440 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1441 GETCHARINCTEST(c, eptr);
1442 if (
1443 #ifdef SUPPORT_UTF8
1444 c >= 256 ||
1445 #endif
1446 (md->ctypes[c] & ctype_space) == 0
1447 )
1448 RRETURN(MATCH_NOMATCH);
1449 ecode++;
1450 break;
1451
1452 case OP_NOT_WORDCHAR:
1453 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1454 GETCHARINCTEST(c, eptr);
1455 if (
1456 #ifdef SUPPORT_UTF8
1457 c < 256 &&
1458 #endif
1459 (md->ctypes[c] & ctype_word) != 0
1460 )
1461 RRETURN(MATCH_NOMATCH);
1462 ecode++;
1463 break;
1464
1465 case OP_WORDCHAR:
1466 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467 GETCHARINCTEST(c, eptr);
1468 if (
1469 #ifdef SUPPORT_UTF8
1470 c >= 256 ||
1471 #endif
1472 (md->ctypes[c] & ctype_word) == 0
1473 )
1474 RRETURN(MATCH_NOMATCH);
1475 ecode++;
1476 break;
1477
1478 case OP_ANYNL:
1479 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1480 GETCHARINCTEST(c, eptr);
1481 switch(c)
1482 {
1483 default: RRETURN(MATCH_NOMATCH);
1484 case 0x000d:
1485 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1486 break;
1487 case 0x000a:
1488 case 0x000b:
1489 case 0x000c:
1490 case 0x0085:
1491 case 0x2028:
1492 case 0x2029:
1493 break;
1494 }
1495 ecode++;
1496 break;
1497
1498 case OP_NOT_HSPACE:
1499 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1500 GETCHARINCTEST(c, eptr);
1501 switch(c)
1502 {
1503 default: break;
1504 case 0x09: /* HT */
1505 case 0x20: /* SPACE */
1506 case 0xa0: /* NBSP */
1507 case 0x1680: /* OGHAM SPACE MARK */
1508 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1509 case 0x2000: /* EN QUAD */
1510 case 0x2001: /* EM QUAD */
1511 case 0x2002: /* EN SPACE */
1512 case 0x2003: /* EM SPACE */
1513 case 0x2004: /* THREE-PER-EM SPACE */
1514 case 0x2005: /* FOUR-PER-EM SPACE */
1515 case 0x2006: /* SIX-PER-EM SPACE */
1516 case 0x2007: /* FIGURE SPACE */
1517 case 0x2008: /* PUNCTUATION SPACE */
1518 case 0x2009: /* THIN SPACE */
1519 case 0x200A: /* HAIR SPACE */
1520 case 0x202f: /* NARROW NO-BREAK SPACE */
1521 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1522 case 0x3000: /* IDEOGRAPHIC SPACE */
1523 RRETURN(MATCH_NOMATCH);
1524 }
1525 ecode++;
1526 break;
1527
1528 case OP_HSPACE:
1529 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1530 GETCHARINCTEST(c, eptr);
1531 switch(c)
1532 {
1533 default: RRETURN(MATCH_NOMATCH);
1534 case 0x09: /* HT */
1535 case 0x20: /* SPACE */
1536 case 0xa0: /* NBSP */
1537 case 0x1680: /* OGHAM SPACE MARK */
1538 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1539 case 0x2000: /* EN QUAD */
1540 case 0x2001: /* EM QUAD */
1541 case 0x2002: /* EN SPACE */
1542 case 0x2003: /* EM SPACE */
1543 case 0x2004: /* THREE-PER-EM SPACE */
1544 case 0x2005: /* FOUR-PER-EM SPACE */
1545 case 0x2006: /* SIX-PER-EM SPACE */
1546 case 0x2007: /* FIGURE SPACE */
1547 case 0x2008: /* PUNCTUATION SPACE */
1548 case 0x2009: /* THIN SPACE */
1549 case 0x200A: /* HAIR SPACE */
1550 case 0x202f: /* NARROW NO-BREAK SPACE */
1551 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1552 case 0x3000: /* IDEOGRAPHIC SPACE */
1553 break;
1554 }
1555 ecode++;
1556 break;
1557
1558 case OP_NOT_VSPACE:
1559 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1560 GETCHARINCTEST(c, eptr);
1561 switch(c)
1562 {
1563 default: break;
1564 case 0x0a: /* LF */
1565 case 0x0b: /* VT */
1566 case 0x0c: /* FF */
1567 case 0x0d: /* CR */
1568 case 0x85: /* NEL */
1569 case 0x2028: /* LINE SEPARATOR */
1570 case 0x2029: /* PARAGRAPH SEPARATOR */
1571 RRETURN(MATCH_NOMATCH);
1572 }
1573 ecode++;
1574 break;
1575
1576 case OP_VSPACE:
1577 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1578 GETCHARINCTEST(c, eptr);
1579 switch(c)
1580 {
1581 default: RRETURN(MATCH_NOMATCH);
1582 case 0x0a: /* LF */
1583 case 0x0b: /* VT */
1584 case 0x0c: /* FF */
1585 case 0x0d: /* CR */
1586 case 0x85: /* NEL */
1587 case 0x2028: /* LINE SEPARATOR */
1588 case 0x2029: /* PARAGRAPH SEPARATOR */
1589 break;
1590 }
1591 ecode++;
1592 break;
1593
1594 #ifdef SUPPORT_UCP
1595 /* Check the next character by Unicode property. We will get here only
1596 if the support is in the binary; otherwise a compile-time error occurs. */
1597
1598 case OP_PROP:
1599 case OP_NOTPROP:
1600 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1601 GETCHARINCTEST(c, eptr);
1602 {
1603 int chartype, script;
1604 int category = _pcre_ucp_findprop(c, &chartype, &script);
1605
1606 switch(ecode[1])
1607 {
1608 case PT_ANY:
1609 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1610 break;
1611
1612 case PT_LAMP:
1613 if ((chartype == ucp_Lu ||
1614 chartype == ucp_Ll ||
1615 chartype == ucp_Lt) == (op == OP_NOTPROP))
1616 RRETURN(MATCH_NOMATCH);
1617 break;
1618
1619 case PT_GC:
1620 if ((ecode[2] != category) == (op == OP_PROP))
1621 RRETURN(MATCH_NOMATCH);
1622 break;
1623
1624 case PT_PC:
1625 if ((ecode[2] != chartype) == (op == OP_PROP))
1626 RRETURN(MATCH_NOMATCH);
1627 break;
1628
1629 case PT_SC:
1630 if ((ecode[2] != script) == (op == OP_PROP))
1631 RRETURN(MATCH_NOMATCH);
1632 break;
1633
1634 default:
1635 RRETURN(PCRE_ERROR_INTERNAL);
1636 }
1637
1638 ecode += 3;
1639 }
1640 break;
1641
1642 /* Match an extended Unicode sequence. We will get here only if the support
1643 is in the binary; otherwise a compile-time error occurs. */
1644
1645 case OP_EXTUNI:
1646 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1647 GETCHARINCTEST(c, eptr);
1648 {
1649 int chartype, script;
1650 int category = _pcre_ucp_findprop(c, &chartype, &script);
1651 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1652 while (eptr < md->end_subject)
1653 {
1654 int len = 1;
1655 if (!utf8) c = *eptr; else
1656 {
1657 GETCHARLEN(c, eptr, len);
1658 }
1659 category = _pcre_ucp_findprop(c, &chartype, &script);
1660 if (category != ucp_M) break;
1661 eptr += len;
1662 }
1663 }
1664 ecode++;
1665 break;
1666 #endif
1667
1668
1669 /* Match a back reference, possibly repeatedly. Look past the end of the
1670 item to see if there is repeat information following. The code is similar
1671 to that for character classes, but repeated for efficiency. Then obey
1672 similar code to character type repeats - written out again for speed.
1673 However, if the referenced string is the empty string, always treat
1674 it as matched, any number of times (otherwise there could be infinite
1675 loops). */
1676
1677 case OP_REF:
1678 {
1679 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1680 ecode += 3; /* Advance past item */
1681
1682 /* If the reference is unset, set the length to be longer than the amount
1683 of subject left; this ensures that every attempt at a match fails. We
1684 can't just fail here, because of the possibility of quantifiers with zero
1685 minima. */
1686
1687 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1688 md->end_subject - eptr + 1 :
1689 md->offset_vector[offset+1] - md->offset_vector[offset];
1690
1691 /* Set up for repetition, or handle the non-repeated case */
1692
1693 switch (*ecode)
1694 {
1695 case OP_CRSTAR:
1696 case OP_CRMINSTAR:
1697 case OP_CRPLUS:
1698 case OP_CRMINPLUS:
1699 case OP_CRQUERY:
1700 case OP_CRMINQUERY:
1701 c = *ecode++ - OP_CRSTAR;
1702 minimize = (c & 1) != 0;
1703 min = rep_min[c]; /* Pick up values from tables; */
1704 max = rep_max[c]; /* zero for max => infinity */
1705 if (max == 0) max = INT_MAX;
1706 break;
1707
1708 case OP_CRRANGE:
1709 case OP_CRMINRANGE:
1710 minimize = (*ecode == OP_CRMINRANGE);
1711 min = GET2(ecode, 1);
1712 max = GET2(ecode, 3);
1713 if (max == 0) max = INT_MAX;
1714 ecode += 5;
1715 break;
1716
1717 default: /* No repeat follows */
1718 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1719 eptr += length;
1720 continue; /* With the main loop */
1721 }
1722
1723 /* If the length of the reference is zero, just continue with the
1724 main loop. */
1725
1726 if (length == 0) continue;
1727
1728 /* First, ensure the minimum number of matches are present. We get back
1729 the length of the reference string explicitly rather than passing the
1730 address of eptr, so that eptr can be a register variable. */
1731
1732 for (i = 1; i <= min; i++)
1733 {
1734 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1735 eptr += length;
1736 }
1737
1738 /* If min = max, continue at the same level without recursion.
1739 They are not both allowed to be zero. */
1740
1741 if (min == max) continue;
1742
1743 /* If minimizing, keep trying and advancing the pointer */
1744
1745 if (minimize)
1746 {
1747 for (fi = min;; fi++)
1748 {
1749 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1750 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1751 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1752 RRETURN(MATCH_NOMATCH);
1753 eptr += length;
1754 }
1755 /* Control never gets here */
1756 }
1757
1758 /* If maximizing, find the longest string and work backwards */
1759
1760 else
1761 {
1762 pp = eptr;
1763 for (i = min; i < max; i++)
1764 {
1765 if (!match_ref(offset, eptr, length, md, ims)) break;
1766 eptr += length;
1767 }
1768 while (eptr >= pp)
1769 {
1770 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1771 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1772 eptr -= length;
1773 }
1774 RRETURN(MATCH_NOMATCH);
1775 }
1776 }
1777 /* Control never gets here */
1778
1779
1780
1781 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1782 used when all the characters in the class have values in the range 0-255,
1783 and either the matching is caseful, or the characters are in the range
1784 0-127 when UTF-8 processing is enabled. The only difference between
1785 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1786 encountered.
1787
1788 First, look past the end of the item to see if there is repeat information
1789 following. Then obey similar code to character type repeats - written out
1790 again for speed. */
1791
1792 case OP_NCLASS:
1793 case OP_CLASS:
1794 {
1795 data = ecode + 1; /* Save for matching */
1796 ecode += 33; /* Advance past the item */
1797
1798 switch (*ecode)
1799 {
1800 case OP_CRSTAR:
1801 case OP_CRMINSTAR:
1802 case OP_CRPLUS:
1803 case OP_CRMINPLUS:
1804 case OP_CRQUERY:
1805 case OP_CRMINQUERY:
1806 c = *ecode++ - OP_CRSTAR;
1807 minimize = (c & 1) != 0;
1808 min = rep_min[c]; /* Pick up values from tables; */
1809 max = rep_max[c]; /* zero for max => infinity */
1810 if (max == 0) max = INT_MAX;
1811 break;
1812
1813 case OP_CRRANGE:
1814 case OP_CRMINRANGE:
1815 minimize = (*ecode == OP_CRMINRANGE);
1816 min = GET2(ecode, 1);
1817 max = GET2(ecode, 3);
1818 if (max == 0) max = INT_MAX;
1819 ecode += 5;
1820 break;
1821
1822 default: /* No repeat follows */
1823 min = max = 1;
1824 break;
1825 }
1826
1827 /* First, ensure the minimum number of matches are present. */
1828
1829 #ifdef SUPPORT_UTF8
1830 /* UTF-8 mode */
1831 if (utf8)
1832 {
1833 for (i = 1; i <= min; i++)
1834 {
1835 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1836 GETCHARINC(c, eptr);
1837 if (c > 255)
1838 {
1839 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1840 }
1841 else
1842 {
1843 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1844 }
1845 }
1846 }
1847 else
1848 #endif
1849 /* Not UTF-8 mode */
1850 {
1851 for (i = 1; i <= min; i++)
1852 {
1853 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1854 c = *eptr++;
1855 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1856 }
1857 }
1858
1859 /* If max == min we can continue with the main loop without the
1860 need to recurse. */
1861
1862 if (min == max) continue;
1863
1864 /* If minimizing, keep testing the rest of the expression and advancing
1865 the pointer while it matches the class. */
1866
1867 if (minimize)
1868 {
1869 #ifdef SUPPORT_UTF8
1870 /* UTF-8 mode */
1871 if (utf8)
1872 {
1873 for (fi = min;; fi++)
1874 {
1875 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1876 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1877 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1878 GETCHARINC(c, eptr);
1879 if (c > 255)
1880 {
1881 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1882 }
1883 else
1884 {
1885 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1886 }
1887 }
1888 }
1889 else
1890 #endif
1891 /* Not UTF-8 mode */
1892 {
1893 for (fi = min;; fi++)
1894 {
1895 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1896 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1897 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1898 c = *eptr++;
1899 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1900 }
1901 }
1902 /* Control never gets here */
1903 }
1904
1905 /* If maximizing, find the longest possible run, then work backwards. */
1906
1907 else
1908 {
1909 pp = eptr;
1910
1911 #ifdef SUPPORT_UTF8
1912 /* UTF-8 mode */
1913 if (utf8)
1914 {
1915 for (i = min; i < max; i++)
1916 {
1917 int len = 1;
1918 if (eptr >= md->end_subject) break;
1919 GETCHARLEN(c, eptr, len);
1920 if (c > 255)
1921 {
1922 if (op == OP_CLASS) break;
1923 }
1924 else
1925 {
1926 if ((data[c/8] & (1 << (c&7))) == 0) break;
1927 }
1928 eptr += len;
1929 }
1930 for (;;)
1931 {
1932 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1933 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1934 if (eptr-- == pp) break; /* Stop if tried at original pos */
1935 BACKCHAR(eptr);
1936 }
1937 }
1938 else
1939 #endif
1940 /* Not UTF-8 mode */
1941 {
1942 for (i = min; i < max; i++)
1943 {
1944 if (eptr >= md->end_subject) break;
1945 c = *eptr;
1946 if ((data[c/8] & (1 << (c&7))) == 0) break;
1947 eptr++;
1948 }
1949 while (eptr >= pp)
1950 {
1951 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1952 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1953 eptr--;
1954 }
1955 }
1956
1957 RRETURN(MATCH_NOMATCH);
1958 }
1959 }
1960 /* Control never gets here */
1961
1962
1963 /* Match an extended character class. This opcode is encountered only
1964 in UTF-8 mode, because that's the only time it is compiled. */
1965
1966 #ifdef SUPPORT_UTF8
1967 case OP_XCLASS:
1968 {
1969 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1970 ecode += GET(ecode, 1); /* Advance past the item */
1971
1972 switch (*ecode)
1973 {
1974 case OP_CRSTAR:
1975 case OP_CRMINSTAR:
1976 case OP_CRPLUS:
1977 case OP_CRMINPLUS:
1978 case OP_CRQUERY:
1979 case OP_CRMINQUERY:
1980 c = *ecode++ - OP_CRSTAR;
1981 minimize = (c & 1) != 0;
1982 min = rep_min[c]; /* Pick up values from tables; */
1983 max = rep_max[c]; /* zero for max => infinity */
1984 if (max == 0) max = INT_MAX;
1985 break;
1986
1987 case OP_CRRANGE:
1988 case OP_CRMINRANGE:
1989 minimize = (*ecode == OP_CRMINRANGE);
1990 min = GET2(ecode, 1);
1991 max = GET2(ecode, 3);
1992 if (max == 0) max = INT_MAX;
1993 ecode += 5;
1994 break;
1995
1996 default: /* No repeat follows */
1997 min = max = 1;
1998 break;
1999 }
2000
2001 /* First, ensure the minimum number of matches are present. */
2002
2003 for (i = 1; i <= min; i++)
2004 {
2005 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2006 GETCHARINC(c, eptr);
2007 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2008 }
2009
2010 /* If max == min we can continue with the main loop without the
2011 need to recurse. */
2012
2013 if (min == max) continue;
2014
2015 /* If minimizing, keep testing the rest of the expression and advancing
2016 the pointer while it matches the class. */
2017
2018 if (minimize)
2019 {
2020 for (fi = min;; fi++)
2021 {
2022 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2023 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2024 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2025 GETCHARINC(c, eptr);
2026 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2027 }
2028 /* Control never gets here */
2029 }
2030
2031 /* If maximizing, find the longest possible run, then work backwards. */
2032
2033 else
2034 {
2035 pp = eptr;
2036 for (i = min; i < max; i++)
2037 {
2038 int len = 1;
2039 if (eptr >= md->end_subject) break;
2040 GETCHARLEN(c, eptr, len);
2041 if (!_pcre_xclass(c, data)) break;
2042 eptr += len;
2043 }
2044 for(;;)
2045 {
2046 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2047 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048 if (eptr-- == pp) break; /* Stop if tried at original pos */
2049 BACKCHAR(eptr)
2050 }
2051 RRETURN(MATCH_NOMATCH);
2052 }
2053
2054 /* Control never gets here */
2055 }
2056 #endif /* End of XCLASS */
2057
2058 /* Match a single character, casefully */
2059
2060 case OP_CHAR:
2061 #ifdef SUPPORT_UTF8
2062 if (utf8)
2063 {
2064 length = 1;
2065 ecode++;
2066 GETCHARLEN(fc, ecode, length);
2067 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2068 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2069 }
2070 else
2071 #endif
2072
2073 /* Non-UTF-8 mode */
2074 {
2075 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2076 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2077 ecode += 2;
2078 }
2079 break;
2080
2081 /* Match a single character, caselessly */
2082
2083 case OP_CHARNC:
2084 #ifdef SUPPORT_UTF8
2085 if (utf8)
2086 {
2087 length = 1;
2088 ecode++;
2089 GETCHARLEN(fc, ecode, length);
2090
2091 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2092
2093 /* If the pattern character's value is < 128, we have only one byte, and
2094 can use the fast lookup table. */
2095
2096 if (fc < 128)
2097 {
2098 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2099 }
2100
2101 /* Otherwise we must pick up the subject character */
2102
2103 else
2104 {
2105 unsigned int dc;
2106 GETCHARINC(dc, eptr);
2107 ecode += length;
2108
2109 /* If we have Unicode property support, we can use it to test the other
2110 case of the character, if there is one. */
2111
2112 if (fc != dc)
2113 {
2114 #ifdef SUPPORT_UCP
2115 if (dc != _pcre_ucp_othercase(fc))
2116 #endif
2117 RRETURN(MATCH_NOMATCH);
2118 }
2119 }
2120 }
2121 else
2122 #endif /* SUPPORT_UTF8 */
2123
2124 /* Non-UTF-8 mode */
2125 {
2126 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2127 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2128 ecode += 2;
2129 }
2130 break;
2131
2132 /* Match a single character repeatedly. */
2133
2134 case OP_EXACT:
2135 min = max = GET2(ecode, 1);
2136 ecode += 3;
2137 goto REPEATCHAR;
2138
2139 case OP_POSUPTO:
2140 possessive = TRUE;
2141 /* Fall through */
2142
2143 case OP_UPTO:
2144 case OP_MINUPTO:
2145 min = 0;
2146 max = GET2(ecode, 1);
2147 minimize = *ecode == OP_MINUPTO;
2148 ecode += 3;
2149 goto REPEATCHAR;
2150
2151 case OP_POSSTAR:
2152 possessive = TRUE;
2153 min = 0;
2154 max = INT_MAX;
2155 ecode++;
2156 goto REPEATCHAR;
2157
2158 case OP_POSPLUS:
2159 possessive = TRUE;
2160 min = 1;
2161 max = INT_MAX;
2162 ecode++;
2163 goto REPEATCHAR;
2164
2165 case OP_POSQUERY:
2166 possessive = TRUE;
2167 min = 0;
2168 max = 1;
2169 ecode++;
2170 goto REPEATCHAR;
2171
2172 case OP_STAR:
2173 case OP_MINSTAR:
2174 case OP_PLUS:
2175 case OP_MINPLUS:
2176 case OP_QUERY:
2177 case OP_MINQUERY:
2178 c = *ecode++ - OP_STAR;
2179 minimize = (c & 1) != 0;
2180 min = rep_min[c]; /* Pick up values from tables; */
2181 max = rep_max[c]; /* zero for max => infinity */
2182 if (max == 0) max = INT_MAX;
2183
2184 /* Common code for all repeated single-character matches. We can give
2185 up quickly if there are fewer than the minimum number of characters left in
2186 the subject. */
2187
2188 REPEATCHAR:
2189 #ifdef SUPPORT_UTF8
2190 if (utf8)
2191 {
2192 length = 1;
2193 charptr = ecode;
2194 GETCHARLEN(fc, ecode, length);
2195 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2196 ecode += length;
2197
2198 /* Handle multibyte character matching specially here. There is
2199 support for caseless matching if UCP support is present. */
2200
2201 if (length > 1)
2202 {
2203 #ifdef SUPPORT_UCP
2204 unsigned int othercase;
2205 if ((ims & PCRE_CASELESS) != 0 &&
2206 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2207 oclength = _pcre_ord2utf8(othercase, occhars);
2208 else oclength = 0;
2209 #endif /* SUPPORT_UCP */
2210
2211 for (i = 1; i <= min; i++)
2212 {
2213 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2214 #ifdef SUPPORT_UCP
2215 /* Need braces because of following else */
2216 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2217 else
2218 {
2219 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2220 eptr += oclength;
2221 }
2222 #else /* without SUPPORT_UCP */
2223 else { RRETURN(MATCH_NOMATCH); }
2224 #endif /* SUPPORT_UCP */
2225 }
2226
2227 if (min == max) continue;
2228
2229 if (minimize)
2230 {
2231 for (fi = min;; fi++)
2232 {
2233 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2234 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2235 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2236 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2237 #ifdef SUPPORT_UCP
2238 /* Need braces because of following else */
2239 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2240 else
2241 {
2242 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2243 eptr += oclength;
2244 }
2245 #else /* without SUPPORT_UCP */
2246 else { RRETURN (MATCH_NOMATCH); }
2247 #endif /* SUPPORT_UCP */
2248 }
2249 /* Control never gets here */
2250 }
2251
2252 else /* Maximize */
2253 {
2254 pp = eptr;
2255 for (i = min; i < max; i++)
2256 {
2257 if (eptr > md->end_subject - length) break;
2258 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2259 #ifdef SUPPORT_UCP
2260 else if (oclength == 0) break;
2261 else
2262 {
2263 if (memcmp(eptr, occhars, oclength) != 0) break;
2264 eptr += oclength;
2265 }
2266 #else /* without SUPPORT_UCP */
2267 else break;
2268 #endif /* SUPPORT_UCP */
2269 }
2270
2271 if (possessive) continue;
2272 for(;;)
2273 {
2274 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2275 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2276 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2277 #ifdef SUPPORT_UCP
2278 eptr--;
2279 BACKCHAR(eptr);
2280 #else /* without SUPPORT_UCP */
2281 eptr -= length;
2282 #endif /* SUPPORT_UCP */
2283 }
2284 }
2285 /* Control never gets here */
2286 }
2287
2288 /* If the length of a UTF-8 character is 1, we fall through here, and
2289 obey the code as for non-UTF-8 characters below, though in this case the
2290 value of fc will always be < 128. */
2291 }
2292 else
2293 #endif /* SUPPORT_UTF8 */
2294
2295 /* When not in UTF-8 mode, load a single-byte character. */
2296 {
2297 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2298 fc = *ecode++;
2299 }
2300
2301 /* The value of fc at this point is always less than 256, though we may or
2302 may not be in UTF-8 mode. The code is duplicated for the caseless and
2303 caseful cases, for speed, since matching characters is likely to be quite
2304 common. First, ensure the minimum number of matches are present. If min =
2305 max, continue at the same level without recursing. Otherwise, if
2306 minimizing, keep trying the rest of the expression and advancing one
2307 matching character if failing, up to the maximum. Alternatively, if
2308 maximizing, find the maximum number of characters and work backwards. */
2309
2310 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2311 max, eptr));
2312
2313 if ((ims & PCRE_CASELESS) != 0)
2314 {
2315 fc = md->lcc[fc];
2316 for (i = 1; i <= min; i++)
2317 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2318 if (min == max) continue;
2319 if (minimize)
2320 {
2321 for (fi = min;; fi++)
2322 {
2323 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2324 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2325 if (fi >= max || eptr >= md->end_subject ||
2326 fc != md->lcc[*eptr++])
2327 RRETURN(MATCH_NOMATCH);
2328 }
2329 /* Control never gets here */
2330 }
2331 else /* Maximize */
2332 {
2333 pp = eptr;
2334 for (i = min; i < max; i++)
2335 {
2336 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2337 eptr++;
2338 }
2339 if (possessive) continue;
2340 while (eptr >= pp)
2341 {
2342 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2343 eptr--;
2344 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345 }
2346 RRETURN(MATCH_NOMATCH);
2347 }
2348 /* Control never gets here */
2349 }
2350
2351 /* Caseful comparisons (includes all multi-byte characters) */
2352
2353 else
2354 {
2355 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2356 if (min == max) continue;
2357 if (minimize)
2358 {
2359 for (fi = min;; fi++)
2360 {
2361 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2362 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2363 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2364 RRETURN(MATCH_NOMATCH);
2365 }
2366 /* Control never gets here */
2367 }
2368 else /* Maximize */
2369 {
2370 pp = eptr;
2371 for (i = min; i < max; i++)
2372 {
2373 if (eptr >= md->end_subject || fc != *eptr) break;
2374 eptr++;
2375 }
2376 if (possessive) continue;
2377 while (eptr >= pp)
2378 {
2379 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2380 eptr--;
2381 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2382 }
2383 RRETURN(MATCH_NOMATCH);
2384 }
2385 }
2386 /* Control never gets here */
2387
2388 /* Match a negated single one-byte character. The character we are
2389 checking can be multibyte. */
2390
2391 case OP_NOT:
2392 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2393 ecode++;
2394 GETCHARINCTEST(c, eptr);
2395 if ((ims & PCRE_CASELESS) != 0)
2396 {
2397 #ifdef SUPPORT_UTF8
2398 if (c < 256)
2399 #endif
2400 c = md->lcc[c];
2401 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2402 }
2403 else
2404 {
2405 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2406 }
2407 break;
2408
2409 /* Match a negated single one-byte character repeatedly. This is almost a
2410 repeat of the code for a repeated single character, but I haven't found a
2411 nice way of commoning these up that doesn't require a test of the
2412 positive/negative option for each character match. Maybe that wouldn't add
2413 very much to the time taken, but character matching *is* what this is all
2414 about... */
2415
2416 case OP_NOTEXACT:
2417 min = max = GET2(ecode, 1);
2418 ecode += 3;
2419 goto REPEATNOTCHAR;
2420
2421 case OP_NOTUPTO:
2422 case OP_NOTMINUPTO:
2423 min = 0;
2424 max = GET2(ecode, 1);
2425 minimize = *ecode == OP_NOTMINUPTO;
2426 ecode += 3;
2427 goto REPEATNOTCHAR;
2428
2429 case OP_NOTPOSSTAR:
2430 possessive = TRUE;
2431 min = 0;
2432 max = INT_MAX;
2433 ecode++;
2434 goto REPEATNOTCHAR;
2435
2436 case OP_NOTPOSPLUS:
2437 possessive = TRUE;
2438 min = 1;
2439 max = INT_MAX;
2440 ecode++;
2441 goto REPEATNOTCHAR;
2442
2443 case OP_NOTPOSQUERY:
2444 possessive = TRUE;
2445 min = 0;
2446 max = 1;
2447 ecode++;
2448 goto REPEATNOTCHAR;
2449
2450 case OP_NOTPOSUPTO:
2451 possessive = TRUE;
2452 min = 0;
2453 max = GET2(ecode, 1);
2454 ecode += 3;
2455 goto REPEATNOTCHAR;
2456
2457 case OP_NOTSTAR:
2458 case OP_NOTMINSTAR:
2459 case OP_NOTPLUS:
2460 case OP_NOTMINPLUS:
2461 case OP_NOTQUERY:
2462 case OP_NOTMINQUERY:
2463 c = *ecode++ - OP_NOTSTAR;
2464 minimize = (c & 1) != 0;
2465 min = rep_min[c]; /* Pick up values from tables; */
2466 max = rep_max[c]; /* zero for max => infinity */
2467 if (max == 0) max = INT_MAX;
2468
2469 /* Common code for all repeated single-byte matches. We can give up quickly
2470 if there are fewer than the minimum number of bytes left in the
2471 subject. */
2472
2473 REPEATNOTCHAR:
2474 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2475 fc = *ecode++;
2476
2477 /* The code is duplicated for the caseless and caseful cases, for speed,
2478 since matching characters is likely to be quite common. First, ensure the
2479 minimum number of matches are present. If min = max, continue at the same
2480 level without recursing. Otherwise, if minimizing, keep trying the rest of
2481 the expression and advancing one matching character if failing, up to the
2482 maximum. Alternatively, if maximizing, find the maximum number of
2483 characters and work backwards. */
2484
2485 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2486 max, eptr));
2487
2488 if ((ims & PCRE_CASELESS) != 0)
2489 {
2490 fc = md->lcc[fc];
2491
2492 #ifdef SUPPORT_UTF8
2493 /* UTF-8 mode */
2494 if (utf8)
2495 {
2496 register unsigned int d;
2497 for (i = 1; i <= min; i++)
2498 {
2499 GETCHARINC(d, eptr);
2500 if (d < 256) d = md->lcc[d];
2501 if (fc == d) RRETURN(MATCH_NOMATCH);
2502 }
2503 }
2504 else
2505 #endif
2506
2507 /* Not UTF-8 mode */
2508 {
2509 for (i = 1; i <= min; i++)
2510 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2511 }
2512
2513 if (min == max) continue;
2514
2515 if (minimize)
2516 {
2517 #ifdef SUPPORT_UTF8
2518 /* UTF-8 mode */
2519 if (utf8)
2520 {
2521 register unsigned int d;
2522 for (fi = min;; fi++)
2523 {
2524 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2525 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2526 GETCHARINC(d, eptr);
2527 if (d < 256) d = md->lcc[d];
2528 if (fi >= max || eptr >= md->end_subject || fc == d)
2529 RRETURN(MATCH_NOMATCH);
2530 }
2531 }
2532 else
2533 #endif
2534 /* Not UTF-8 mode */
2535 {
2536 for (fi = min;; fi++)
2537 {
2538 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2539 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2541 RRETURN(MATCH_NOMATCH);
2542 }
2543 }
2544 /* Control never gets here */
2545 }
2546
2547 /* Maximize case */
2548
2549 else
2550 {
2551 pp = eptr;
2552
2553 #ifdef SUPPORT_UTF8
2554 /* UTF-8 mode */
2555 if (utf8)
2556 {
2557 register unsigned int d;
2558 for (i = min; i < max; i++)
2559 {
2560 int len = 1;
2561 if (eptr >= md->end_subject) break;
2562 GETCHARLEN(d, eptr, len);
2563 if (d < 256) d = md->lcc[d];
2564 if (fc == d) break;
2565 eptr += len;
2566 }
2567 if (possessive) continue;
2568 for(;;)
2569 {
2570 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2571 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2572 if (eptr-- == pp) break; /* Stop if tried at original pos */
2573 BACKCHAR(eptr);
2574 }
2575 }
2576 else
2577 #endif
2578 /* Not UTF-8 mode */
2579 {
2580 for (i = min; i < max; i++)
2581 {
2582 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2583 eptr++;
2584 }
2585 if (possessive) continue;
2586 while (eptr >= pp)
2587 {
2588 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2589 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2590 eptr--;
2591 }
2592 }
2593
2594 RRETURN(MATCH_NOMATCH);
2595 }
2596 /* Control never gets here */
2597 }
2598
2599 /* Caseful comparisons */
2600
2601 else
2602 {
2603 #ifdef SUPPORT_UTF8
2604 /* UTF-8 mode */
2605 if (utf8)
2606 {
2607 register unsigned int d;
2608 for (i = 1; i <= min; i++)
2609 {
2610 GETCHARINC(d, eptr);
2611 if (fc == d) RRETURN(MATCH_NOMATCH);
2612 }
2613 }
2614 else
2615 #endif
2616 /* Not UTF-8 mode */
2617 {
2618 for (i = 1; i <= min; i++)
2619 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2620 }
2621
2622 if (min == max) continue;
2623
2624 if (minimize)
2625 {
2626 #ifdef SUPPORT_UTF8
2627 /* UTF-8 mode */
2628 if (utf8)
2629 {
2630 register unsigned int d;
2631 for (fi = min;; fi++)
2632 {
2633 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2634 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635 GETCHARINC(d, eptr);
2636 if (fi >= max || eptr >= md->end_subject || fc == d)
2637 RRETURN(MATCH_NOMATCH);
2638 }
2639 }
2640 else
2641 #endif
2642 /* Not UTF-8 mode */
2643 {
2644 for (fi = min;; fi++)
2645 {
2646 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2647 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2648 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2649 RRETURN(MATCH_NOMATCH);
2650 }
2651 }
2652 /* Control never gets here */
2653 }
2654
2655 /* Maximize case */
2656
2657 else
2658 {
2659 pp = eptr;
2660
2661 #ifdef SUPPORT_UTF8
2662 /* UTF-8 mode */
2663 if (utf8)
2664 {
2665 register unsigned int d;
2666 for (i = min; i < max; i++)
2667 {
2668 int len = 1;
2669 if (eptr >= md->end_subject) break;
2670 GETCHARLEN(d, eptr, len);
2671 if (fc == d) break;
2672 eptr += len;
2673 }
2674 if (possessive) continue;
2675 for(;;)
2676 {
2677 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2678 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2679 if (eptr-- == pp) break; /* Stop if tried at original pos */
2680 BACKCHAR(eptr);
2681 }
2682 }
2683 else
2684 #endif
2685 /* Not UTF-8 mode */
2686 {
2687 for (i = min; i < max; i++)
2688 {
2689 if (eptr >= md->end_subject || fc == *eptr) break;
2690 eptr++;
2691 }
2692 if (possessive) continue;
2693 while (eptr >= pp)
2694 {
2695 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2696 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2697 eptr--;
2698 }
2699 }
2700
2701 RRETURN(MATCH_NOMATCH);
2702 }
2703 }
2704 /* Control never gets here */
2705
2706 /* Match a single character type repeatedly; several different opcodes
2707 share code. This is very similar to the code for single characters, but we
2708 repeat it in the interests of efficiency. */
2709
2710 case OP_TYPEEXACT:
2711 min = max = GET2(ecode, 1);
2712 minimize = TRUE;
2713 ecode += 3;
2714 goto REPEATTYPE;
2715
2716 case OP_TYPEUPTO:
2717 case OP_TYPEMINUPTO:
2718 min = 0;
2719 max = GET2(ecode, 1);
2720 minimize = *ecode == OP_TYPEMINUPTO;
2721 ecode += 3;
2722 goto REPEATTYPE;
2723
2724 case OP_TYPEPOSSTAR:
2725 possessive = TRUE;
2726 min = 0;
2727 max = INT_MAX;
2728 ecode++;
2729 goto REPEATTYPE;
2730
2731 case OP_TYPEPOSPLUS:
2732 possessive = TRUE;
2733 min = 1;
2734 max = INT_MAX;
2735 ecode++;
2736 goto REPEATTYPE;
2737
2738 case OP_TYPEPOSQUERY:
2739 possessive = TRUE;
2740 min = 0;
2741 max = 1;
2742 ecode++;
2743 goto REPEATTYPE;
2744
2745 case OP_TYPEPOSUPTO:
2746 possessive = TRUE;
2747 min = 0;
2748 max = GET2(ecode, 1);
2749 ecode += 3;
2750 goto REPEATTYPE;
2751
2752 case OP_TYPESTAR:
2753 case OP_TYPEMINSTAR:
2754 case OP_TYPEPLUS:
2755 case OP_TYPEMINPLUS:
2756 case OP_TYPEQUERY:
2757 case OP_TYPEMINQUERY:
2758 c = *ecode++ - OP_TYPESTAR;
2759 minimize = (c & 1) != 0;
2760 min = rep_min[c]; /* Pick up values from tables; */
2761 max = rep_max[c]; /* zero for max => infinity */
2762 if (max == 0) max = INT_MAX;
2763
2764 /* Common code for all repeated single character type matches. Note that
2765 in UTF-8 mode, '.' matches a character of any length, but for the other
2766 character types, the valid characters are all one-byte long. */
2767
2768 REPEATTYPE:
2769 ctype = *ecode++; /* Code for the character type */
2770
2771 #ifdef SUPPORT_UCP
2772 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2773 {
2774 prop_fail_result = ctype == OP_NOTPROP;
2775 prop_type = *ecode++;
2776 prop_value = *ecode++;
2777 }
2778 else prop_type = -1;
2779 #endif
2780
2781 /* First, ensure the minimum number of matches are present. Use inline
2782 code for maximizing the speed, and do the type test once at the start
2783 (i.e. keep it out of the loop). Also we can test that there are at least
2784 the minimum number of bytes before we start. This isn't as effective in
2785 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2786 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2787 and single-bytes. */
2788
2789 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2790 if (min > 0)
2791 {
2792 #ifdef SUPPORT_UCP
2793 if (prop_type >= 0)
2794 {
2795 switch(prop_type)
2796 {
2797 case PT_ANY:
2798 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2799 for (i = 1; i <= min; i++)
2800 {
2801 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2802 GETCHARINCTEST(c, eptr);
2803 }
2804 break;
2805
2806 case PT_LAMP:
2807 for (i = 1; i <= min; i++)
2808 {
2809 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810 GETCHARINCTEST(c, eptr);
2811 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812 if ((prop_chartype == ucp_Lu ||
2813 prop_chartype == ucp_Ll ||
2814 prop_chartype == ucp_Lt) == prop_fail_result)
2815 RRETURN(MATCH_NOMATCH);
2816 }
2817 break;
2818
2819 case PT_GC:
2820 for (i = 1; i <= min; i++)
2821 {
2822 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2823 GETCHARINCTEST(c, eptr);
2824 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2825 if ((prop_category == prop_value) == prop_fail_result)
2826 RRETURN(MATCH_NOMATCH);
2827 }
2828 break;
2829
2830 case PT_PC:
2831 for (i = 1; i <= min; i++)
2832 {
2833 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2834 GETCHARINCTEST(c, eptr);
2835 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2836 if ((prop_chartype == prop_value) == prop_fail_result)
2837 RRETURN(MATCH_NOMATCH);
2838 }
2839 break;
2840
2841 case PT_SC:
2842 for (i = 1; i <= min; i++)
2843 {
2844 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845 GETCHARINCTEST(c, eptr);
2846 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2847 if ((prop_script == prop_value) == prop_fail_result)
2848 RRETURN(MATCH_NOMATCH);
2849 }
2850 break;
2851
2852 default:
2853 RRETURN(PCRE_ERROR_INTERNAL);
2854 }
2855 }
2856
2857 /* Match extended Unicode sequences. We will get here only if the
2858 support is in the binary; otherwise a compile-time error occurs. */
2859
2860 else if (ctype == OP_EXTUNI)
2861 {
2862 for (i = 1; i <= min; i++)
2863 {
2864 GETCHARINCTEST(c, eptr);
2865 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2866 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2867 while (eptr < md->end_subject)
2868 {
2869 int len = 1;
2870 if (!utf8) c = *eptr; else
2871 {
2872 GETCHARLEN(c, eptr, len);
2873 }
2874 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2875 if (prop_category != ucp_M) break;
2876 eptr += len;
2877 }
2878 }
2879 }
2880
2881 else
2882 #endif /* SUPPORT_UCP */
2883
2884 /* Handle all other cases when the coding is UTF-8 */
2885
2886 #ifdef SUPPORT_UTF8
2887 if (utf8) switch(ctype)
2888 {
2889 case OP_ANY:
2890 for (i = 1; i <= min; i++)
2891 {
2892 if (eptr >= md->end_subject ||
2893 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2894 RRETURN(MATCH_NOMATCH);
2895 eptr++;
2896 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2897 }
2898 break;
2899
2900 case OP_ANYBYTE:
2901 eptr += min;
2902 break;
2903
2904 case OP_ANYNL:
2905 for (i = 1; i <= min; i++)
2906 {
2907 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2908 GETCHARINC(c, eptr);
2909 switch(c)
2910 {
2911 default: RRETURN(MATCH_NOMATCH);
2912 case 0x000d:
2913 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2914 break;
2915 case 0x000a:
2916 case 0x000b:
2917 case 0x000c:
2918 case 0x0085:
2919 case 0x2028:
2920 case 0x2029:
2921 break;
2922 }
2923 }
2924 break;
2925
2926 case OP_NOT_HSPACE:
2927 for (i = 1; i <= min; i++)
2928 {
2929 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2930 GETCHARINC(c, eptr);
2931 switch(c)
2932 {
2933 default: break;
2934 case 0x09: /* HT */
2935 case 0x20: /* SPACE */
2936 case 0xa0: /* NBSP */
2937 case 0x1680: /* OGHAM SPACE MARK */
2938 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2939 case 0x2000: /* EN QUAD */
2940 case 0x2001: /* EM QUAD */
2941 case 0x2002: /* EN SPACE */
2942 case 0x2003: /* EM SPACE */
2943 case 0x2004: /* THREE-PER-EM SPACE */
2944 case 0x2005: /* FOUR-PER-EM SPACE */
2945 case 0x2006: /* SIX-PER-EM SPACE */
2946 case 0x2007: /* FIGURE SPACE */
2947 case 0x2008: /* PUNCTUATION SPACE */
2948 case 0x2009: /* THIN SPACE */
2949 case 0x200A: /* HAIR SPACE */
2950 case 0x202f: /* NARROW NO-BREAK SPACE */
2951 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2952 case 0x3000: /* IDEOGRAPHIC SPACE */
2953 RRETURN(MATCH_NOMATCH);
2954 }
2955 }
2956 break;
2957
2958 case OP_HSPACE:
2959 for (i = 1; i <= min; i++)
2960 {
2961 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2962 GETCHARINC(c, eptr);
2963 switch(c)
2964 {
2965 default: RRETURN(MATCH_NOMATCH);
2966 case 0x09: /* HT */
2967 case 0x20: /* SPACE */
2968 case 0xa0: /* NBSP */
2969 case 0x1680: /* OGHAM SPACE MARK */
2970 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2971 case 0x2000: /* EN QUAD */
2972 case 0x2001: /* EM QUAD */
2973 case 0x2002: /* EN SPACE */
2974 case 0x2003: /* EM SPACE */
2975 case 0x2004: /* THREE-PER-EM SPACE */
2976 case 0x2005: /* FOUR-PER-EM SPACE */
2977 case 0x2006: /* SIX-PER-EM SPACE */
2978 case 0x2007: /* FIGURE SPACE */
2979 case 0x2008: /* PUNCTUATION SPACE */
2980 case 0x2009: /* THIN SPACE */
2981 case 0x200A: /* HAIR SPACE */
2982 case 0x202f: /* NARROW NO-BREAK SPACE */
2983 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2984 case 0x3000: /* IDEOGRAPHIC SPACE */
2985 break;
2986 }
2987 }
2988 break;
2989
2990 case OP_NOT_VSPACE:
2991 for (i = 1; i <= min; i++)
2992 {
2993 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2994 GETCHARINC(c, eptr);
2995 switch(c)
2996 {
2997 default: break;
2998 case 0x0a: /* LF */
2999 case 0x0b: /* VT */
3000 case 0x0c: /* FF */
3001 case 0x0d: /* CR */
3002 case 0x85: /* NEL */
3003 case 0x2028: /* LINE SEPARATOR */
3004 case 0x2029: /* PARAGRAPH SEPARATOR */
3005 RRETURN(MATCH_NOMATCH);
3006 }
3007 }
3008 break;
3009
3010 case OP_VSPACE:
3011 for (i = 1; i <= min; i++)
3012 {
3013 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3014 GETCHARINC(c, eptr);
3015 switch(c)
3016 {
3017 default: RRETURN(MATCH_NOMATCH);
3018 case 0x0a: /* LF */
3019 case 0x0b: /* VT */
3020 case 0x0c: /* FF */
3021 case 0x0d: /* CR */
3022 case 0x85: /* NEL */
3023 case 0x2028: /* LINE SEPARATOR */
3024 case 0x2029: /* PARAGRAPH SEPARATOR */
3025 break;
3026 }
3027 }
3028 break;
3029
3030 case OP_NOT_DIGIT:
3031 for (i = 1; i <= min; i++)
3032 {
3033 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3034 GETCHARINC(c, eptr);
3035 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3036 RRETURN(MATCH_NOMATCH);
3037 }
3038 break;
3039
3040 case OP_DIGIT:
3041 for (i = 1; i <= min; i++)
3042 {
3043 if (eptr >= md->end_subject ||
3044 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3045 RRETURN(MATCH_NOMATCH);
3046 /* No need to skip more bytes - we know it's a 1-byte character */
3047 }
3048 break;
3049
3050 case OP_NOT_WHITESPACE:
3051 for (i = 1; i <= min; i++)
3052 {
3053 if (eptr >= md->end_subject ||
3054 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3055 RRETURN(MATCH_NOMATCH);
3056 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3057 }
3058 break;
3059
3060 case OP_WHITESPACE:
3061 for (i = 1; i <= min; i++)
3062 {
3063 if (eptr >= md->end_subject ||
3064 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3065 RRETURN(MATCH_NOMATCH);
3066 /* No need to skip more bytes - we know it's a 1-byte character */
3067 }
3068 break;
3069
3070 case OP_NOT_WORDCHAR:
3071 for (i = 1; i <= min; i++)
3072 {
3073 if (eptr >= md->end_subject ||
3074 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3075 RRETURN(MATCH_NOMATCH);
3076 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3077 }
3078 break;
3079
3080 case OP_WORDCHAR:
3081 for (i = 1; i <= min; i++)
3082 {
3083 if (eptr >= md->end_subject ||
3084 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3085 RRETURN(MATCH_NOMATCH);
3086 /* No need to skip more bytes - we know it's a 1-byte character */
3087 }
3088 break;
3089
3090 default:
3091 RRETURN(PCRE_ERROR_INTERNAL);
3092 } /* End switch(ctype) */
3093
3094 else
3095 #endif /* SUPPORT_UTF8 */
3096
3097 /* Code for the non-UTF-8 case for minimum matching of operators other
3098 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3099 number of bytes present, as this was tested above. */
3100
3101 switch(ctype)
3102 {
3103 case OP_ANY:
3104 if ((ims & PCRE_DOTALL) == 0)
3105 {
3106 for (i = 1; i <= min; i++)
3107 {
3108 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3109 eptr++;
3110 }
3111 }
3112 else eptr += min;
3113 break;
3114
3115 case OP_ANYBYTE:
3116 eptr += min;
3117 break;
3118
3119 /* Because of the CRLF case, we can't assume the minimum number of
3120 bytes are present in this case. */
3121
3122 case OP_ANYNL:
3123 for (i = 1; i <= min; i++)
3124 {
3125 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3126 switch(*eptr++)
3127 {
3128 default: RRETURN(MATCH_NOMATCH);
3129 case 0x000d:
3130 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3131 break;
3132 case 0x000a:
3133 case 0x000b:
3134 case 0x000c:
3135 case 0x0085:
3136 break;
3137 }
3138 }
3139 break;
3140
3141 case OP_NOT_HSPACE:
3142 for (i = 1; i <= min; i++)
3143 {
3144 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3145 switch(*eptr++)
3146 {
3147 default: break;
3148 case 0x09: /* HT */
3149 case 0x20: /* SPACE */
3150 case 0xa0: /* NBSP */
3151 RRETURN(MATCH_NOMATCH);
3152 }
3153 }
3154 break;
3155
3156 case OP_HSPACE:
3157 for (i = 1; i <= min; i++)
3158 {
3159 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3160 switch(*eptr++)
3161 {
3162 default: RRETURN(MATCH_NOMATCH);
3163 case 0x09: /* HT */
3164 case 0x20: /* SPACE */
3165 case 0xa0: /* NBSP */
3166 break;
3167 }
3168 }
3169 break;
3170
3171 case OP_NOT_VSPACE:
3172 for (i = 1; i <= min; i++)
3173 {
3174 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3175 switch(*eptr++)
3176 {
3177 default: break;
3178 case 0x0a: /* LF */
3179 case 0x0b: /* VT */
3180 case 0x0c: /* FF */
3181 case 0x0d: /* CR */
3182 case 0x85: /* NEL */
3183 RRETURN(MATCH_NOMATCH);
3184 }
3185 }
3186 break;
3187
3188 case OP_VSPACE:
3189 for (i = 1; i <= min; i++)
3190 {
3191 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3192 switch(*eptr++)
3193 {
3194 default: RRETURN(MATCH_NOMATCH);
3195 case 0x0a: /* LF */
3196 case 0x0b: /* VT */
3197 case 0x0c: /* FF */
3198 case 0x0d: /* CR */
3199 case 0x85: /* NEL */
3200 break;
3201 }
3202 }
3203 break;
3204
3205 case OP_NOT_DIGIT:
3206 for (i = 1; i <= min; i++)
3207 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3208 break;
3209
3210 case OP_DIGIT:
3211 for (i = 1; i <= min; i++)
3212 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3213 break;
3214
3215 case OP_NOT_WHITESPACE:
3216 for (i = 1; i <= min; i++)
3217 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3218 break;
3219
3220 case OP_WHITESPACE:
3221 for (i = 1; i <= min; i++)
3222 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3223 break;
3224
3225 case OP_NOT_WORDCHAR:
3226 for (i = 1; i <= min; i++)
3227 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3228 RRETURN(MATCH_NOMATCH);
3229 break;
3230
3231 case OP_WORDCHAR:
3232 for (i = 1; i <= min; i++)
3233 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3234 RRETURN(MATCH_NOMATCH);
3235 break;
3236
3237 default:
3238 RRETURN(PCRE_ERROR_INTERNAL);
3239 }
3240 }
3241
3242 /* If min = max, continue at the same level without recursing */
3243
3244 if (min == max) continue;
3245
3246 /* If minimizing, we have to test the rest of the pattern before each
3247 subsequent match. Again, separate the UTF-8 case for speed, and also
3248 separate the UCP cases. */
3249
3250 if (minimize)
3251 {
3252 #ifdef SUPPORT_UCP
3253 if (prop_type >= 0)
3254 {
3255 switch(prop_type)
3256 {
3257 case PT_ANY:
3258 for (fi = min;; fi++)
3259 {
3260 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3261 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3262 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3263 GETCHARINC(c, eptr);
3264 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3265 }
3266 /* Control never gets here */
3267
3268 case PT_LAMP:
3269 for (fi = min;; fi++)
3270 {
3271 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3272 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3273 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3274 GETCHARINC(c, eptr);
3275 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3276 if ((prop_chartype == ucp_Lu ||
3277 prop_chartype == ucp_Ll ||
3278 prop_chartype == ucp_Lt) == prop_fail_result)
3279 RRETURN(MATCH_NOMATCH);
3280 }
3281 /* Control never gets here */
3282
3283 case PT_GC:
3284 for (fi = min;; fi++)
3285 {
3286 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3287 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289 GETCHARINC(c, eptr);
3290 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3291 if ((prop_category == prop_value) == prop_fail_result)
3292 RRETURN(MATCH_NOMATCH);
3293 }
3294 /* Control never gets here */
3295
3296 case PT_PC:
3297 for (fi = min;; fi++)
3298 {
3299 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3300 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302 GETCHARINC(c, eptr);
3303 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3304 if ((prop_chartype == prop_value) == prop_fail_result)
3305 RRETURN(MATCH_NOMATCH);
3306 }
3307 /* Control never gets here */
3308
3309 case PT_SC:
3310 for (fi = min;; fi++)
3311 {
3312 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3313 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3315 GETCHARINC(c, eptr);
3316 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3317 if ((prop_script == prop_value) == prop_fail_result)
3318 RRETURN(MATCH_NOMATCH);
3319 }
3320 /* Control never gets here */
3321
3322 default:
3323 RRETURN(PCRE_ERROR_INTERNAL);
3324 }
3325 }
3326
3327 /* Match extended Unicode sequences. We will get here only if the
3328 support is in the binary; otherwise a compile-time error occurs. */
3329
3330 else if (ctype == OP_EXTUNI)
3331 {
3332 for (fi = min;; fi++)
3333 {
3334 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3335 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3336 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3337 GETCHARINCTEST(c, eptr);
3338 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3339 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3340 while (eptr < md->end_subject)
3341 {
3342 int len = 1;
3343 if (!utf8) c = *eptr; else
3344 {
3345 GETCHARLEN(c, eptr, len);
3346 }
3347 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3348 if (prop_category != ucp_M) break;
3349 eptr += len;
3350 }
3351 }
3352 }
3353
3354 else
3355 #endif /* SUPPORT_UCP */
3356
3357 #ifdef SUPPORT_UTF8
3358 /* UTF-8 mode */
3359 if (utf8)
3360 {
3361 for (fi = min;; fi++)
3362 {
3363 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3364 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3365 if (fi >= max || eptr >= md->end_subject ||
3366 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3367 IS_NEWLINE(eptr)))
3368 RRETURN(MATCH_NOMATCH);
3369
3370 GETCHARINC(c, eptr);
3371 switch(ctype)
3372 {
3373 case OP_ANY: /* This is the DOTALL case */
3374 break;
3375
3376 case OP_ANYBYTE:
3377 break;
3378
3379 case OP_ANYNL:
3380 switch(c)
3381 {
3382 default: RRETURN(MATCH_NOMATCH);
3383 case 0x000d:
3384 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3385 break;
3386 case 0x000a:
3387 case 0x000b:
3388 case 0x000c:
3389 case 0x0085:
3390 case 0x2028:
3391 case 0x2029:
3392 break;
3393 }
3394 break;
3395
3396 case OP_NOT_HSPACE:
3397 switch(c)
3398 {
3399 default: break;
3400 case 0x09: /* HT */
3401 case 0x20: /* SPACE */
3402 case 0xa0: /* NBSP */
3403 case 0x1680: /* OGHAM SPACE MARK */
3404 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3405 case 0x2000: /* EN QUAD */
3406 case 0x2001: /* EM QUAD */
3407 case 0x2002: /* EN SPACE */
3408 case 0x2003: /* EM SPACE */
3409 case 0x2004: /* THREE-PER-EM SPACE */
3410 case 0x2005: /* FOUR-PER-EM SPACE */
3411 case 0x2006: /* SIX-PER-EM SPACE */
3412 case 0x2007: /* FIGURE SPACE */
3413 case 0x2008: /* PUNCTUATION SPACE */
3414 case 0x2009: /* THIN SPACE */
3415 case 0x200A: /* HAIR SPACE */
3416 case 0x202f: /* NARROW NO-BREAK SPACE */
3417 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3418 case 0x3000: /* IDEOGRAPHIC SPACE */
3419 RRETURN(MATCH_NOMATCH);
3420 }
3421 break;
3422
3423 case OP_HSPACE:
3424 switch(c)
3425 {
3426 default: RRETURN(MATCH_NOMATCH);
3427 case 0x09: /* HT */
3428 case 0x20: /* SPACE */
3429 case 0xa0: /* NBSP */
3430 case 0x1680: /* OGHAM SPACE MARK */
3431 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3432 case 0x2000: /* EN QUAD */
3433 case 0x2001: /* EM QUAD */
3434 case 0x2002: /* EN SPACE */
3435 case 0x2003: /* EM SPACE */
3436 case 0x2004: /* THREE-PER-EM SPACE */
3437 case 0x2005: /* FOUR-PER-EM SPACE */
3438 case 0x2006: /* SIX-PER-EM SPACE */
3439 case 0x2007: /* FIGURE SPACE */
3440 case 0x2008: /* PUNCTUATION SPACE */
3441 case 0x2009: /* THIN SPACE */
3442 case 0x200A: /* HAIR SPACE */
3443 case 0x202f: /* NARROW NO-BREAK SPACE */
3444 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3445 case 0x3000: /* IDEOGRAPHIC SPACE */
3446 break;
3447 }
3448 break;
3449
3450 case OP_NOT_VSPACE:
3451 switch(c)
3452 {
3453 default: break;
3454 case 0x0a: /* LF */
3455 case 0x0b: /* VT */
3456 case 0x0c: /* FF */
3457 case 0x0d: /* CR */
3458 case 0x85: /* NEL */
3459 case 0x2028: /* LINE SEPARATOR */
3460 case 0x2029: /* PARAGRAPH SEPARATOR */
3461 RRETURN(MATCH_NOMATCH);
3462 }
3463 break;
3464
3465 case OP_VSPACE:
3466 switch(c)
3467 {
3468 default: RRETURN(MATCH_NOMATCH);
3469 case 0x0a: /* LF */
3470 case 0x0b: /* VT */
3471 case 0x0c: /* FF */
3472 case 0x0d: /* CR */
3473 case 0x85: /* NEL */
3474 case 0x2028: /* LINE SEPARATOR */
3475 case 0x2029: /* PARAGRAPH SEPARATOR */
3476 break;
3477 }
3478 break;
3479
3480 case OP_NOT_DIGIT:
3481 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3482 RRETURN(MATCH_NOMATCH);
3483 break;
3484
3485 case OP_DIGIT:
3486 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3487 RRETURN(MATCH_NOMATCH);
3488 break;
3489
3490 case OP_NOT_WHITESPACE:
3491 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3492 RRETURN(MATCH_NOMATCH);
3493 break;
3494
3495 case OP_WHITESPACE:
3496 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3497 RRETURN(MATCH_NOMATCH);
3498 break;
3499
3500 case OP_NOT_WORDCHAR:
3501 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3502 RRETURN(MATCH_NOMATCH);
3503 break;
3504
3505 case OP_WORDCHAR:
3506 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3507 RRETURN(MATCH_NOMATCH);
3508 break;
3509
3510 default:
3511 RRETURN(PCRE_ERROR_INTERNAL);
3512 }
3513 }
3514 }
3515 else
3516 #endif
3517 /* Not UTF-8 mode */
3518 {
3519 for (fi = min;; fi++)
3520 {
3521 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3522 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3523 if (fi >= max || eptr >= md->end_subject ||
3524 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3525 RRETURN(MATCH_NOMATCH);
3526
3527 c = *eptr++;
3528 switch(ctype)
3529 {
3530 case OP_ANY: /* This is the DOTALL case */
3531 break;
3532
3533 case OP_ANYBYTE:
3534 break;
3535
3536 case OP_ANYNL:
3537 switch(c)
3538 {
3539 default: RRETURN(MATCH_NOMATCH);
3540 case 0x000d:
3541 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3542 break;
3543 case 0x000a:
3544 case 0x000b:
3545 case 0x000c:
3546 case 0x0085:
3547 break;
3548 }
3549 break;
3550
3551 case OP_NOT_HSPACE:
3552 switch(c)
3553 {
3554 default: break;
3555 case 0x09: /* HT */
3556 case 0x20: /* SPACE */
3557 case 0xa0: /* NBSP */
3558 RRETURN(MATCH_NOMATCH);
3559 }
3560 break;
3561
3562 case OP_HSPACE:
3563 switch(c)
3564 {
3565 default: RRETURN(MATCH_NOMATCH);
3566 case 0x09: /* HT */
3567 case 0x20: /* SPACE */
3568 case 0xa0: /* NBSP */
3569 break;
3570 }
3571 break;
3572
3573 case OP_NOT_VSPACE:
3574 switch(c)
3575 {
3576 default: break;
3577 case 0x0a: /* LF */
3578 case 0x0b: /* VT */
3579 case 0x0c: /* FF */
3580 case 0x0d: /* CR */
3581 case 0x85: /* NEL */
3582 RRETURN(MATCH_NOMATCH);
3583 }
3584 break;
3585
3586 case OP_VSPACE:
3587 switch(c)
3588 {
3589 default: RRETURN(MATCH_NOMATCH);
3590 case 0x0a: /* LF */
3591 case 0x0b: /* VT */
3592 case 0x0c: /* FF */
3593 case 0x0d: /* CR */
3594 case 0x85: /* NEL */
3595 break;
3596 }
3597 break;
3598
3599 case OP_NOT_DIGIT:
3600 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3601 break;
3602
3603 case OP_DIGIT:
3604 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3605 break;
3606
3607 case OP_NOT_WHITESPACE:
3608 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3609 break;
3610
3611 case OP_WHITESPACE:
3612 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3613 break;
3614
3615 case OP_NOT_WORDCHAR:
3616 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3617 break;
3618
3619 case OP_WORDCHAR:
3620 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3621 break;
3622
3623 default:
3624 RRETURN(PCRE_ERROR_INTERNAL);
3625 }
3626 }
3627 }
3628 /* Control never gets here */
3629 }
3630
3631 /* If maximizing, it is worth using inline code for speed, doing the type
3632 test once at the start (i.e. keep it out of the loop). Again, keep the
3633 UTF-8 and UCP stuff separate. */
3634
3635 else
3636 {
3637 pp = eptr; /* Remember where we started */
3638
3639 #ifdef SUPPORT_UCP
3640 if (prop_type >= 0)
3641 {
3642 switch(prop_type)
3643 {
3644 case PT_ANY:
3645 for (i = min; i < max; i++)
3646 {
3647 int len = 1;
3648 if (eptr >= md->end_subject) break;
3649 GETCHARLEN(c, eptr, len);
3650 if (prop_fail_result) break;
3651 eptr+= len;
3652 }
3653 break;
3654
3655 case PT_LAMP:
3656 for (i = min; i < max; i++)
3657 {
3658 int len = 1;
3659 if (eptr >= md->end_subject) break;
3660 GETCHARLEN(c, eptr, len);
3661 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3662 if ((prop_chartype == ucp_Lu ||
3663 prop_chartype == ucp_Ll ||
3664 prop_chartype == ucp_Lt) == prop_fail_result)
3665 break;
3666 eptr+= len;
3667 }
3668 break;
3669
3670 case PT_GC:
3671 for (i = min; i < max; i++)
3672 {
3673 int len = 1;
3674 if (eptr >= md->end_subject) break;
3675 GETCHARLEN(c, eptr, len);
3676 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3677 if ((prop_category == prop_value) == prop_fail_result)
3678 break;
3679 eptr+= len;
3680 }
3681 break;
3682
3683 case PT_PC:
3684 for (i = min; i < max; i++)
3685 {
3686 int len = 1;
3687 if (eptr >= md->end_subject) break;
3688 GETCHARLEN(c, eptr, len);
3689 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3690 if ((prop_chartype == prop_value) == prop_fail_result)
3691 break;
3692 eptr+= len;
3693 }
3694 break;
3695
3696 case PT_SC:
3697 for (i = min; i < max; i++)
3698 {
3699 int len = 1;
3700 if (eptr >= md->end_subject) break;
3701 GETCHARLEN(c, eptr, len);
3702 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3703 if ((prop_script == prop_value) == prop_fail_result)
3704 break;
3705 eptr+= len;
3706 }
3707 break;
3708 }
3709
3710 /* eptr is now past the end of the maximum run */
3711
3712 if (possessive) continue;
3713 for(;;)
3714 {
3715 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3716 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3717 if (eptr-- == pp) break; /* Stop if tried at original pos */
3718 BACKCHAR(eptr);
3719 }
3720 }
3721
3722 /* Match extended Unicode sequences. We will get here only if the
3723 support is in the binary; otherwise a compile-time error occurs. */
3724
3725 else if (ctype == OP_EXTUNI)
3726 {
3727 for (i = min; i < max; i++)
3728 {
3729 if (eptr >= md->end_subject) break;
3730 GETCHARINCTEST(c, eptr);
3731 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3732 if (prop_category == ucp_M) break;
3733 while (eptr < md->end_subject)
3734 {
3735 int len = 1;
3736 if (!utf8) c = *eptr; else
3737 {
3738 GETCHARLEN(c, eptr, len);
3739 }
3740 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3741 if (prop_category != ucp_M) break;
3742 eptr += len;
3743 }
3744 }
3745
3746 /* eptr is now past the end of the maximum run */
3747
3748 if (possessive) continue;
3749 for(;;)
3750 {
3751 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3752 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3753 if (eptr-- == pp) break; /* Stop if tried at original pos */
3754 for (;;) /* Move back over one extended */
3755 {
3756 int len = 1;
3757 BACKCHAR(eptr);
3758 if (!utf8) c = *eptr; else
3759 {
3760 GETCHARLEN(c, eptr, len);
3761 }
3762 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3763 if (prop_category != ucp_M) break;
3764 eptr--;
3765 }
3766 }
3767 }
3768
3769 else
3770 #endif /* SUPPORT_UCP */
3771
3772 #ifdef SUPPORT_UTF8
3773 /* UTF-8 mode */
3774
3775 if (utf8)
3776 {
3777 switch(ctype)
3778 {
3779 case OP_ANY:
3780 if (max < INT_MAX)
3781 {
3782 if ((ims & PCRE_DOTALL) == 0)
3783 {
3784 for (i = min; i < max; i++)
3785 {
3786 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3787 eptr++;
3788 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3789 }
3790 }
3791 else
3792 {
3793 for (i = min; i < max; i++)
3794 {
3795 if (eptr >= md->end_subject) break;
3796 eptr++;
3797 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3798 }
3799 }
3800 }
3801
3802 /* Handle unlimited UTF-8 repeat */
3803
3804 else
3805 {
3806 if ((ims & PCRE_DOTALL) == 0)
3807 {
3808 for (i = min; i < max; i++)
3809 {
3810 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3811 eptr++;
3812 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3813 }
3814 }
3815 else
3816 {
3817 eptr = md->end_subject;
3818 }
3819 }
3820 break;
3821
3822 /* The byte case is the same as non-UTF8 */
3823
3824 case OP_ANYBYTE:
3825 c = max - min;
3826 if (c > (unsigned int)(md->end_subject - eptr))
3827 c = md->end_subject - eptr;
3828 eptr += c;
3829 break;
3830
3831 case OP_ANYNL:
3832 for (i = min; i < max; i++)
3833 {
3834 int len = 1;
3835 if (eptr >= md->end_subject) break;
3836 GETCHARLEN(c, eptr, len);
3837 if (c == 0x000d)
3838 {
3839 if (++eptr >= md->end_subject) break;
3840 if (*eptr == 0x000a) eptr++;
3841 }
3842 else
3843 {
3844 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3845 c != 0x0085 && c != 0x2028 && c != 0x2029)
3846 break;
3847 eptr += len;
3848 }
3849 }
3850 break;
3851
3852 case OP_NOT_HSPACE:
3853 case OP_HSPACE:
3854 for (i = min; i < max; i++)
3855 {
3856 BOOL gotspace;
3857 int len = 1;
3858 if (eptr >= md->end_subject) break;
3859 GETCHARLEN(c, eptr, len);
3860 switch(c)
3861 {
3862 default: gotspace = FALSE; break;
3863 case 0x09: /* HT */
3864 case 0x20: /* SPACE */
3865 case 0xa0: /* NBSP */
3866 case 0x1680: /* OGHAM SPACE MARK */
3867 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3868 case 0x2000: /* EN QUAD */
3869 case 0x2001: /* EM QUAD */
3870 case 0x2002: /* EN SPACE */
3871 case 0x2003: /* EM SPACE */
3872 case 0x2004: /* THREE-PER-EM SPACE */
3873 case 0x2005: /* FOUR-PER-EM SPACE */
3874 case 0x2006: /* SIX-PER-EM SPACE */
3875 case 0x2007: /* FIGURE SPACE */
3876 case 0x2008: /* PUNCTUATION SPACE */
3877 case 0x2009: /* THIN SPACE */
3878 case 0x200A: /* HAIR SPACE */
3879 case 0x202f: /* NARROW NO-BREAK SPACE */
3880 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3881 case 0x3000: /* IDEOGRAPHIC SPACE */
3882 gotspace = TRUE;
3883 break;
3884 }
3885 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3886 eptr += len;
3887 }
3888 break;
3889
3890 case OP_NOT_VSPACE:
3891 case OP_VSPACE:
3892 for (i = min; i < max; i++)
3893 {
3894 BOOL gotspace;
3895 int len = 1;
3896 if (eptr >= md->end_subject) break;
3897 GETCHARLEN(c, eptr, len);
3898 switch(c)
3899 {
3900 default: gotspace = FALSE; break;
3901 case 0x0a: /* LF */
3902 case 0x0b: /* VT */
3903 case 0x0c: /* FF */
3904 case 0x0d: /* CR */
3905 case 0x85: /* NEL */
3906 case 0x2028: /* LINE SEPARATOR */
3907 case 0x2029: /* PARAGRAPH SEPARATOR */
3908 gotspace = TRUE;
3909 break;
3910 }
3911 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3912 eptr += len;
3913 }
3914 break;
3915
3916 case OP_NOT_DIGIT:
3917 for (i = min; i < max; i++)
3918 {
3919 int len = 1;
3920 if (eptr >= md->end_subject) break;
3921 GETCHARLEN(c, eptr, len);
3922 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3923 eptr+= len;
3924 }
3925 break;
3926
3927 case OP_DIGIT:
3928 for (i = min; i < max; i++)
3929 {
3930 int len = 1;
3931 if (eptr >= md->end_subject) break;
3932 GETCHARLEN(c, eptr, len);
3933 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3934 eptr+= len;
3935 }
3936 break;
3937
3938 case OP_NOT_WHITESPACE:
3939 for (i = min; i < max; i++)
3940 {
3941 int len = 1;
3942 if (eptr >= md->end_subject) break;
3943 GETCHARLEN(c, eptr, len);
3944 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3945 eptr+= len;
3946 }
3947 break;
3948
3949 case OP_WHITESPACE:
3950 for (i = min; i < max; i++)
3951 {
3952 int len = 1;
3953 if (eptr >= md->end_subject) break;
3954 GETCHARLEN(c, eptr, len);
3955 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3956 eptr+= len;
3957 }
3958 break;
3959
3960 case OP_NOT_WORDCHAR:
3961 for (i = min; i < max; i++)
3962 {
3963 int len = 1;
3964 if (eptr >= md->end_subject) break;
3965 GETCHARLEN(c, eptr, len);
3966 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3967 eptr+= len;
3968 }
3969 break;
3970
3971 case OP_WORDCHAR:
3972 for (i = min; i < max; i++)
3973 {
3974 int len = 1;
3975 if (eptr >= md->end_subject) break;
3976 GETCHARLEN(c, eptr, len);
3977 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3978 eptr+= len;
3979 }
3980 break;
3981
3982 default:
3983 RRETURN(PCRE_ERROR_INTERNAL);
3984 }
3985
3986 /* eptr is now past the end of the maximum run */
3987
3988 if (possessive) continue;
3989 for(;;)
3990 {
3991 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3992 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3993 if (eptr-- == pp) break; /* Stop if tried at original pos */
3994 BACKCHAR(eptr);
3995 }
3996 }
3997 else
3998 #endif
3999
4000 /* Not UTF-8 mode */
4001 {
4002 switch(ctype)
4003 {
4004 case OP_ANY:
4005 if ((ims & PCRE_DOTALL) == 0)
4006 {
4007 for (i = min; i < max; i++)
4008 {
4009 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4010 eptr++;
4011 }
4012 break;
4013 }
4014 /* For DOTALL case, fall through and treat as \C */
4015
4016 case OP_ANYBYTE:
4017 c = max - min;
4018 if (c > (unsigned int)(md->end_subject - eptr))
4019 c = md->end_subject - eptr;
4020 eptr += c;
4021 break;
4022
4023 case OP_ANYNL:
4024 for (i = min; i < max; i++)
4025 {
4026 if (eptr >= md->end_subject) break;
4027 c = *eptr;
4028 if (c == 0x000d)
4029 {
4030 if (++eptr >= md->end_subject) break;
4031 if (*eptr == 0x000a) eptr++;
4032 }
4033 else
4034 {
4035 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4036 break;
4037 eptr++;
4038 }
4039 }
4040 break;
4041
4042 case OP_NOT_HSPACE:
4043 for (i = min; i < max; i++)
4044 {
4045 if (eptr >= md->end_subject) break;
4046 c = *eptr;
4047 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4048 eptr++;
4049 }
4050 break;
4051
4052 case OP_HSPACE:
4053 for (i = min; i < max; i++)
4054 {
4055 if (eptr >= md->end_subject) break;
4056 c = *eptr;
4057 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4058 eptr++;
4059 }
4060 break;
4061
4062 case OP_NOT_VSPACE:
4063 for (i = min; i < max; i++)
4064 {
4065 if (eptr >= md->end_subject) break;
4066 c = *eptr;
4067 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4068 break;
4069 eptr++;
4070 }
4071 break;
4072
4073 case OP_VSPACE:
4074 for (i = min; i < max; i++)
4075 {
4076 if (eptr >= md->end_subject) break;
4077 c = *eptr;
4078 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4079 break;
4080 eptr++;
4081 }
4082 break;
4083
4084 case OP_NOT_DIGIT:
4085 for (i = min; i < max; i++)
4086 {
4087 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4088 break;
4089 eptr++;
4090 }
4091 break;
4092
4093 case OP_DIGIT:
4094 for (i = min; i < max; i++)
4095 {
4096 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4097 break;
4098 eptr++;
4099 }
4100 break;
4101
4102 case OP_NOT_WHITESPACE:
4103 for (i = min; i < max; i++)
4104 {
4105 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4106 break;
4107 eptr++;
4108 }
4109 break;
4110
4111 case OP_WHITESPACE:
4112 for (i = min; i < max; i++)
4113 {
4114 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4115 break;
4116 eptr++;
4117 }
4118 break;
4119
4120 case OP_NOT_WORDCHAR:
4121 for (i = min; i < max; i++)
4122 {
4123 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4124 break;
4125 eptr++;
4126 }
4127 break;
4128
4129 case OP_WORDCHAR:
4130 for (i = min; i < max; i++)
4131 {
4132 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4133 break;
4134 eptr++;
4135 }
4136 break;
4137
4138 default:
4139 RRETURN(PCRE_ERROR_INTERNAL);
4140 }
4141
4142 /* eptr is now past the end of the maximum run */
4143
4144 if (possessive) continue;
4145 while (eptr >= pp)
4146 {
4147 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4148 eptr--;
4149 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4150 }
4151 }
4152
4153 /* Get here if we can't make it match with any permitted repetitions */
4154
4155 RRETURN(MATCH_NOMATCH);
4156 }
4157 /* Control never gets here */
4158
4159 /* There's been some horrible disaster. Arrival here can only mean there is
4160 something seriously wrong in the code above or the OP_xxx definitions. */
4161
4162 default:
4163 DPRINTF(("Unknown opcode %d\n", *ecode));
4164 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4165 }
4166
4167 /* Do not stick any code in here without much thought; it is assumed
4168 that "continue" in the code above comes out to here to repeat the main
4169 loop. */
4170
4171 } /* End of main loop */
4172 /* Control never reaches here */
4173
4174
4175 /* When compiling to use the heap rather than the stack for recursive calls to
4176 match(), the RRETURN() macro jumps here. The number that is saved in
4177 frame->Xwhere indicates which label we actually want to return to. */
4178
4179 #ifdef NO_RECURSE
4180 #define LBL(val) case val: goto L_RM##val;
4181 HEAP_RETURN:
4182 switch (frame->Xwhere)
4183 {
4184 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4185 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4186 LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4187 LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4188 LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4189 LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4190 default:
4191 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4192 return PCRE_ERROR_INTERNAL;
4193 }
4194 #undef LBL
4195 #endif /* NO_RECURSE */
4196 }
4197
4198
4199 /***************************************************************************
4200 ****************************************************************************
4201 RECURSION IN THE match() FUNCTION
4202
4203 Undefine all the macros that were defined above to handle this. */
4204
4205 #ifdef NO_RECURSE
4206 #undef eptr
4207 #undef ecode
4208 #undef mstart
4209 #undef offset_top
4210 #undef ims
4211 #undef eptrb
4212 #undef flags
4213
4214 #undef callpat
4215 #undef charptr
4216 #undef data
4217 #undef next
4218 #undef pp
4219 #undef prev
4220 #undef saved_eptr
4221
4222 #undef new_recursive
4223
4224 #undef cur_is_word
4225 #undef condition
4226 #undef prev_is_word
4227
4228 #undef original_ims
4229
4230 #undef ctype
4231 #undef length
4232 #undef max
4233 #undef min
4234 #undef number
4235 #undef offset
4236 #undef op
4237 #undef save_capture_last
4238 #undef save_offset1
4239 #undef save_offset2
4240 #undef save_offset3
4241 #undef stacksave
4242
4243 #undef newptrb
4244
4245 #endif
4246
4247 /* These two are defined as macros in both cases */
4248
4249 #undef fc
4250 #undef fi
4251
4252 /***************************************************************************
4253 ***************************************************************************/
4254
4255
4256
4257 /*************************************************
4258 * Execute a Regular Expression *
4259 *************************************************/
4260
4261 /* This function applies a compiled re to a subject string and picks out
4262 portions of the string if it matches. Two elements in the vector are set for
4263 each substring: the offsets to the start and end of the substring.
4264
4265 Arguments:
4266 argument_re points to the compiled expression
4267 extra_data points to extra data or is NULL
4268 subject points to the subject string
4269 length length of subject string (may contain binary zeros)
4270 start_offset where to start in the subject string
4271 options option bits
4272 offsets points to a vector of ints to be filled in with offsets
4273 offsetcount the number of elements in the vector
4274
4275 Returns: > 0 => success; value is the number of elements filled in
4276 = 0 => success, but offsets is not big enough
4277 -1 => failed to match
4278 < -1 => some kind of unexpected problem
4279 */
4280
4281 PCRE_EXP_DEFN int
4282 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4283 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4284 int offsetcount)
4285 {
4286 int rc, resetcount, ocount;
4287 int first_byte = -1;
4288 int req_byte = -1;
4289 int req_byte2 = -1;
4290 int newline;
4291 unsigned long int ims;
4292 BOOL using_temporary_offsets = FALSE;
4293 BOOL anchored;
4294 BOOL startline;
4295 BOOL firstline;
4296 BOOL first_byte_caseless = FALSE;
4297 BOOL req_byte_caseless = FALSE;
4298 BOOL utf8;
4299 match_data match_block;
4300 match_data *md = &match_block;
4301 const uschar *tables;
4302 const uschar *start_bits = NULL;
4303 USPTR start_match = (USPTR)subject + start_offset;
4304 USPTR end_subject;
4305 USPTR req_byte_ptr = start_match - 1;
4306
4307 pcre_study_data internal_study;
4308 const pcre_study_data *study;
4309
4310 real_pcre internal_re;
4311 const real_pcre *external_re = (const real_pcre *)argument_re;
4312 const real_pcre *re = external_re;
4313
4314 /* Plausibility checks */
4315
4316 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4317 if (re == NULL || subject == NULL ||
4318 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4319 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4320
4321 /* Fish out the optional data from the extra_data structure, first setting
4322 the default values. */
4323
4324 study = NULL;
4325 md->match_limit = MATCH_LIMIT;
4326 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4327 md->callout_data = NULL;
4328
4329 /* The table pointer is always in native byte order. */
4330
4331 tables = external_re->tables;
4332
4333 if (extra_data != NULL)
4334 {
4335 register unsigned int flags = extra_data->flags;
4336 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4337 study = (const pcre_study_data *)extra_data->study_data;
4338 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4339 md->match_limit = extra_data->match_limit;
4340 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4341 md->match_limit_recursion = extra_data->match_limit_recursion;
4342 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4343 md->callout_data = extra_data->callout_data;
4344 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4345 }
4346
4347 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4348 is a feature that makes it possible to save compiled regex and re-use them
4349 in other programs later. */
4350
4351 if (tables == NULL) tables = _pcre_default_tables;
4352
4353 /* Check that the first field in the block is the magic number. If it is not,
4354 test for a regex that was compiled on a host of opposite endianness. If this is
4355 the case, flipped values are put in internal_re and internal_study if there was
4356 study data too. */
4357
4358 if (re->magic_number != MAGIC_NUMBER)
4359 {
4360 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4361 if (re == NULL) return PCRE_ERROR_BADMAGIC;
4362 if (study != NULL) study = &internal_study;
4363 }
4364
4365 /* Set up other data */
4366
4367 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4368 startline = (re->options & PCRE_STARTLINE) != 0;
4369 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4370
4371 /* The code starts after the real_pcre block and the capture name table. */
4372
4373 md->start_code = (const uschar *)external_re + re->name_table_offset +
4374 re->name_count * re->name_entry_size;
4375
4376 md->start_subject = (USPTR)subject;
4377 md->start_offset = start_offset;
4378 md->end_subject = md->start_subject + length;
4379 end_subject = md->end_subject;
4380
4381 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4382 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4383
4384 md->notbol = (options & PCRE_NOTBOL) != 0;
4385 md->noteol = (options & PCRE_NOTEOL) != 0;
4386 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4387 md->partial = (options & PCRE_PARTIAL) != 0;
4388 md->hitend = FALSE;
4389
4390 md->recursive = NULL; /* No recursion at top level */
4391
4392 md->lcc = tables + lcc_offset;
4393 md->ctypes = tables + ctypes_offset;
4394
4395 /* Handle different types of newline. The three bits give eight cases. If
4396 nothing is set at run time, whatever was used at compile time applies. */
4397
4398 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4399 PCRE_NEWLINE_BITS)
4400 {
4401 case 0: newline = NEWLINE; break; /* Compile-time default */
4402 case PCRE_NEWLINE_CR: newline = '\r'; break;
4403 case PCRE_NEWLINE_LF: newline = '\n'; break;
4404 case PCRE_NEWLINE_CR+
4405 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4406 case PCRE_NEWLINE_ANY: newline = -1; break;
4407 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4408 default: return PCRE_ERROR_BADNEWLINE;
4409 }
4410
4411 if (newline == -2)
4412 {
4413 md->nltype = NLTYPE_ANYCRLF;
4414 }
4415 else if (newline < 0)
4416 {
4417 md->nltype = NLTYPE_ANY;
4418 }
4419 else
4420 {
4421 md->nltype = NLTYPE_FIXED;
4422 if (newline > 255)
4423 {
4424 md->nllen = 2;
4425 md->nl[0] = (newline >> 8) & 255;
4426 md->nl[1] = newline & 255;
4427 }
4428 else
4429 {
4430 md->nllen = 1;
4431 md->nl[0] = newline;
4432 }
4433 }
4434
4435 /* Partial matching is supported only for a restricted set of regexes at the
4436 moment. */
4437
4438 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4439 return PCRE_ERROR_BADPARTIAL;
4440
4441 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4442 back the character offset. */
4443
4444 #ifdef SUPPORT_UTF8
4445 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4446 {
4447 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4448 return PCRE_ERROR_BADUTF8;
4449 if (start_offset > 0 && start_offset < length)
4450 {
4451 int tb = ((uschar *)subject)[start_offset];
4452 if (tb > 127)
4453 {
4454 tb &= 0xc0;
4455 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4456 }
4457 }
4458 }
4459 #endif
4460
4461 /* The ims options can vary during the matching as a result of the presence
4462 of (?ims) items in the pattern. They are kept in a local variable so that
4463 restoring at the exit of a group is easy. */
4464
4465 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4466
4467 /* If the expression has got more back references than the offsets supplied can
4468 hold, we get a temporary chunk of working store to use during the matching.
4469 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4470 of 3. */
4471
4472 ocount = offsetcount - (offsetcount % 3);
4473
4474 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4475 {
4476 ocount = re->top_backref * 3 + 3;
4477 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4478 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4479 using_temporary_offsets = TRUE;
4480 DPRINTF(("Got memory to hold back references\n"));
4481 }
4482 else md->offset_vector = offsets;
4483
4484 md->offset_end = ocount;
4485 md->offset_max = (2*ocount)/3;
4486 md->offset_overflow = FALSE;
4487 md->capture_last = -1;
4488
4489 /* Compute the minimum number of offsets that we need to reset each time. Doing
4490 this makes a huge difference to execution time when there aren't many brackets
4491 in the pattern. */
4492
4493 resetcount = 2 + re->top_bracket * 2;
4494 if (resetcount > offsetcount) resetcount = ocount;
4495
4496 /* Reset the working variable associated with each extraction. These should
4497 never be used unless previously set, but they get saved and restored, and so we
4498 initialize them to avoid reading uninitialized locations. */
4499
4500 if (md->offset_vector != NULL)
4501 {
4502 register int *iptr = md->offset_vector + ocount;
4503 register int *iend = iptr - resetcount/2 + 1;
4504 while (--iptr >= iend) *iptr = -1;
4505 }
4506
4507 /* Set up the first character to match, if available. The first_byte value is
4508 never set for an anchored regular expression, but the anchoring may be forced
4509 at run time, so we have to test for anchoring. The first char may be unset for
4510 an unanchored pattern, of course. If there's no first char and the pattern was
4511 studied, there may be a bitmap of possible first characters. */
4512
4513 if (!anchored)
4514 {
4515 if ((re->options & PCRE_FIRSTSET) != 0)
4516 {
4517 first_byte = re->first_byte & 255;
4518 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4519 first_byte = md->lcc[first_byte];
4520 }
4521 else
4522 if (!startline && study != NULL &&
4523 (study->options & PCRE_STUDY_MAPPED) != 0)
4524 start_bits = study->start_bits;
4525 }
4526
4527 /* For anchored or unanchored matches, there may be a "last known required
4528 character" set. */
4529
4530 if ((re->options & PCRE_REQCHSET) != 0)
4531 {
4532 req_byte = re->req_byte & 255;
4533 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4534 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
4535 }
4536
4537
4538 /* ==========================================================================*/
4539
4540 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4541 the loop runs just once. */
4542
4543 for(;;)
4544 {
4545 USPTR save_end_subject = end_subject;
4546
4547 /* Reset the maximum number of extractions we might see. */
4548
4549 if (md->offset_vector != NULL)
4550 {
4551 register int *iptr = md->offset_vector;
4552 register int *iend = iptr + resetcount;
4553 while (iptr < iend) *iptr++ = -1;
4554 }
4555
4556 /* Advance to a unique first char if possible. If firstline is TRUE, the
4557 start of the match is constrained to the first line of a multiline string.
4558 That is, the match must be before or at the first newline. Implement this by
4559 temporarily adjusting end_subject so that we stop scanning at a newline. If
4560 the match fails at the newline, later code breaks this loop. */
4561
4562 if (firstline)
4563 {
4564 USPTR t = start_match;
4565 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4566 end_subject = t;
4567 }
4568
4569 /* Now test for a unique first byte */
4570
4571 if (first_byte >= 0)
4572 {
4573 if (first_byte_caseless)
4574 while (start_match < end_subject &&
4575 md->lcc[*start_match] != first_byte)
4576 start_match++;
4577 else
4578 while (start_match < end_subject && *start_match != first_byte)
4579 start_match++;
4580 }
4581
4582 /* Or to just after a linebreak for a multiline match if possible */
4583
4584 else if (startline)
4585 {
4586 if (start_match > md->start_subject + start_offset)
4587 {
4588 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4589 start_match++;
4590
4591 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4592 and we are now at a LF, advance the match position by one more character.
4593 */
4594
4595 if (start_match[-1] == '\r' &&
4596 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4597 start_match < end_subject &&
4598 *start_match == '\n')
4599 start_match++;
4600 }
4601 }
4602
4603 /* Or to a non-unique first char after study */
4604
4605 else if (start_bits != NULL)
4606 {
4607 while (start_match < end_subject)
4608 {
4609 register unsigned int c = *start_match;
4610 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4611 }
4612 }
4613
4614 /* Restore fudged end_subject */
4615
4616 end_subject = save_end_subject;
4617
4618 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4619 printf(">>>> Match against: ");
4620 pchars(start_match, end_subject - start_match, TRUE, md);
4621 printf("\n");
4622 #endif
4623
4624 /* If req_byte is set, we know that that character must appear in the subject
4625 for the match to succeed. If the first character is set, req_byte must be
4626 later in the subject; otherwise the test starts at the match point. This
4627 optimization can save a huge amount of backtracking in patterns with nested
4628 unlimited repeats that aren't going to match. Writing separate code for
4629 cased/caseless versions makes it go faster, as does using an autoincrement
4630 and backing off on a match.
4631
4632 HOWEVER: when the subject string is very, very long, searching to its end can
4633 take a long time, and give bad performance on quite ordinary patterns. This
4634 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4635 string... so we don't do this when the string is sufficiently long.
4636
4637 ALSO: this processing is disabled when partial matching is requested.
4638 */
4639
4640 if (req_byte >= 0 &&
4641 end_subject - start_match < REQ_BYTE_MAX &&
4642 !md->partial)
4643 {
4644 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4645
4646 /* We don't need to repeat the search if we haven't yet reached the
4647 place we found it at last time. */
4648
4649 if (p > req_byte_ptr)
4650 {
4651 if (req_byte_caseless)
4652 {
4653 while (p < end_subject)
4654 {
4655 register int pp = *p++;
4656 if (pp == req_byte || pp == req_byte2) { p--; break; }
4657 }
4658 }
4659 else
4660 {
4661 while (p < end_subject)
4662 {
4663 if (*p++ == req_byte) { p--; break; }
4664 }
4665 }
4666
4667 /* If we can't find the required character, break the matching loop,
4668 forcing a match failure. */
4669
4670 if (p >= end_subject)
4671 {
4672 rc = MATCH_NOMATCH;
4673 break;
4674 }
4675
4676 /* If we have found the required character, save the point where we
4677 found it, so that we don't search again next time round the loop if
4678 the start hasn't passed this character yet. */
4679
4680 req_byte_ptr = p;
4681 }
4682 }
4683
4684 /* OK, we can now run the match. */
4685
4686 md->start_match_ptr = start_match; /* Insurance */
4687 md->match_call_count = 0;
4688 rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4689
4690 /* Any return other than MATCH_NOMATCH breaks the loop. */
4691
4692 if (rc != MATCH_NOMATCH) break;
4693
4694 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4695 newline in the subject (though it may continue over the newline). Therefore,
4696 if we have just failed to match, starting at a newline, do not continue. */
4697
4698 if (firstline && IS_NEWLINE(start_match)) break;
4699
4700 /* Advance the match position by one character. */
4701
4702 start_match++;
4703 #ifdef SUPPORT_UTF8
4704 if (utf8)
4705 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4706 start_match++;
4707 #endif
4708
4709 /* Break the loop if the pattern is anchored or if we have passed the end of
4710 the subject. */
4711
4712 if (anchored || start_match > end_subject) break;
4713
4714 /* If we have just passed a CR and the newline option is CRLF or ANY or
4715 ANYCRLF, and we are now at a LF, advance the match position by one more
4716 character. */
4717
4718 if (start_match[-1] == '\r' &&
4719 (md->nltype == NLTYPE_ANY ||
4720 md->nltype == NLTYPE_ANYCRLF ||
4721 md->nllen == 2) &&
4722 start_match < end_subject &&
4723 *start_match == '\n')
4724 start_match++;
4725
4726 } /* End of for(;;) "bumpalong" loop */
4727
4728 /* ==========================================================================*/
4729
4730 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4731 conditions is true:
4732
4733 (1) The pattern is anchored;
4734
4735 (2) We are past the end of the subject;
4736
4737 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4738 this option requests that a match occur at or before the first newline in
4739 the subject.
4740
4741 When we have a match and the offset vector is big enough to deal with any
4742 backreferences, captured substring offsets will already be set up. In the case
4743 where we had to get some local store to hold offsets for backreference
4744 processing, copy those that we can. In this case there need not be overflow if
4745 certain parts of the pattern were not used, even though there are more
4746 capturing parentheses than vector slots. */
4747
4748 if (rc == MATCH_MATCH)
4749 {
4750 if (using_temporary_offsets)
4751 {
4752 if (offsetcount >= 4)
4753 {
4754 memcpy(offsets + 2, md->offset_vector + 2,
4755 (offsetcount - 2) * sizeof(int));
4756 DPRINTF(("Copied offsets from temporary memory\n"));
4757 }
4758 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4759 DPRINTF(("Freeing temporary memory\n"));
4760 (pcre_free)(md->offset_vector);
4761 }
4762
4763 /* Set the return code to the number of captured strings, or 0 if there are
4764 too many to fit into the vector. */
4765
4766 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4767
4768 /* If there is space, set up the whole thing as substring 0. The value of
4769 md->start_match_ptr might be modified if \K was encountered on the success
4770 matching path. */
4771
4772 if (offsetcount < 2) rc = 0; else
4773 {
4774 offsets[0] = md->start_match_ptr - md->start_subject;
4775 offsets[1] = md->end_match_ptr - md->start_subject;
4776 }
4777
4778 DPRINTF((">>>> returning %d\n", rc));
4779 return rc;
4780 }
4781
4782 /* Control gets here if there has been an error, or if the overall match
4783 attempt has failed at all permitted starting positions. */
4784
4785 if (using_temporary_offsets)
4786 {
4787 DPRINTF(("Freeing temporary memory\n"));
4788 (pcre_free)(md->offset_vector);
4789 }
4790
4791 if (rc != MATCH_NOMATCH)
4792 {
4793 DPRINTF((">>>> error: returning %d\n", rc));
4794 return rc;
4795 }
4796 else if (md->partial && md->hitend)
4797 {
4798 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4799 return PCRE_ERROR_PARTIAL;
4800 }
4801 else
4802 {
4803 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4804 return PCRE_ERROR_NOMATCH;
4805 }
4806 }
4807
4808 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12