/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 569 - (show annotations) (download)
Sun Nov 7 16:14:50 2010 UTC (3 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 186848 byte(s)
Add PCRE_ERROR_SHORTUTF8 to PCRE_PARTIAL_HARD processing.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* If a back reference hasn't been set, the length that is passed is greater
136 than the number of characters left in the string, so the match fails.
137
138 Arguments:
139 offset index into the offset vector
140 eptr points into the subject
141 length length to be matched
142 md points to match data block
143 ims the ims flags
144
145 Returns: TRUE if matched
146 */
147
148 static BOOL
149 match_ref(int offset, register USPTR eptr, int length, match_data *md,
150 unsigned long int ims)
151 {
152 USPTR p = md->start_subject + md->offset_vector[offset];
153
154 #ifdef PCRE_DEBUG
155 if (eptr >= md->end_subject)
156 printf("matching subject <null>");
157 else
158 {
159 printf("matching subject ");
160 pchars(eptr, length, TRUE, md);
161 }
162 printf(" against backref ");
163 pchars(p, length, FALSE, md);
164 printf("\n");
165 #endif
166
167 /* Always fail if not enough characters left */
168
169 if (length > md->end_subject - eptr) return FALSE;
170
171 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172 properly if Unicode properties are supported. Otherwise, we can check only
173 ASCII characters. */
174
175 if ((ims & PCRE_CASELESS) != 0)
176 {
177 #ifdef SUPPORT_UTF8
178 #ifdef SUPPORT_UCP
179 if (md->utf8)
180 {
181 USPTR endptr = eptr + length;
182 while (eptr < endptr)
183 {
184 int c, d;
185 GETCHARINC(c, eptr);
186 GETCHARINC(d, p);
187 if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188 }
189 }
190 else
191 #endif
192 #endif
193
194 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195 is no UCP support. */
196
197 while (length-- > 0)
198 { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199 }
200
201 /* In the caseful case, we can just compare the bytes, whether or not we
202 are in UTF-8 mode. */
203
204 else
205 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206
207 return TRUE;
208 }
209
210
211
212 /***************************************************************************
213 ****************************************************************************
214 RECURSION IN THE match() FUNCTION
215
216 The match() function is highly recursive, though not every recursive call
217 increases the recursive depth. Nevertheless, some regular expressions can cause
218 it to recurse to a great depth. I was writing for Unix, so I just let it call
219 itself recursively. This uses the stack for saving everything that has to be
220 saved for a recursive call. On Unix, the stack can be large, and this works
221 fine.
222
223 It turns out that on some non-Unix-like systems there are problems with
224 programs that use a lot of stack. (This despite the fact that every last chip
225 has oodles of memory these days, and techniques for extending the stack have
226 been known for decades.) So....
227
228 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
229 calls by keeping local variables that need to be preserved in blocks of memory
230 obtained from malloc() instead instead of on the stack. Macros are used to
231 achieve this so that the actual code doesn't look very different to what it
232 always used to.
233
234 The original heap-recursive code used longjmp(). However, it seems that this
235 can be very slow on some operating systems. Following a suggestion from Stan
236 Switzer, the use of longjmp() has been abolished, at the cost of having to
237 provide a unique number for each call to RMATCH. There is no way of generating
238 a sequence of numbers at compile time in C. I have given them names, to make
239 them stand out more clearly.
240
241 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243 tests. Furthermore, not using longjmp() means that local dynamic variables
244 don't have indeterminate values; this has meant that the frame size can be
245 reduced because the result can be "passed back" by straight setting of the
246 variable instead of being passed in the frame.
247 ****************************************************************************
248 ***************************************************************************/
249
250 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251 below must be updated in sync. */
252
253 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
254 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259 RM61, RM62 };
260
261 /* These versions of the macros use the stack, as normal. There are debugging
262 versions and production versions. Note that the "rw" argument of RMATCH isn't
263 actually used in this definition. */
264
265 #ifndef NO_RECURSE
266 #define REGISTER register
267
268 #ifdef PCRE_DEBUG
269 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270 { \
271 printf("match() called in line %d\n", __LINE__); \
272 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273 printf("to line %d\n", __LINE__); \
274 }
275 #define RRETURN(ra) \
276 { \
277 printf("match() returned %d from line %d ", ra, __LINE__); \
278 return ra; \
279 }
280 #else
281 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283 #define RRETURN(ra) return ra
284 #endif
285
286 #else
287
288
289 /* These versions of the macros manage a private stack on the heap. Note that
290 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291 argument of match(), which never changes. */
292
293 #define REGISTER
294
295 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296 {\
297 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299 frame->Xwhere = rw; \
300 newframe->Xeptr = ra;\
301 newframe->Xecode = rb;\
302 newframe->Xmstart = mstart;\
303 newframe->Xmarkptr = markptr;\
304 newframe->Xoffset_top = rc;\
305 newframe->Xims = re;\
306 newframe->Xeptrb = rf;\
307 newframe->Xflags = rg;\
308 newframe->Xrdepth = frame->Xrdepth + 1;\
309 newframe->Xprevframe = frame;\
310 frame = newframe;\
311 DPRINTF(("restarting from line %d\n", __LINE__));\
312 goto HEAP_RECURSE;\
313 L_##rw:\
314 DPRINTF(("jumped back to line %d\n", __LINE__));\
315 }
316
317 #define RRETURN(ra)\
318 {\
319 heapframe *oldframe = frame;\
320 frame = oldframe->Xprevframe;\
321 (pcre_stack_free)(oldframe);\
322 if (frame != NULL)\
323 {\
324 rrc = ra;\
325 goto HEAP_RETURN;\
326 }\
327 return ra;\
328 }
329
330
331 /* Structure for remembering the local variables in a private frame */
332
333 typedef struct heapframe {
334 struct heapframe *Xprevframe;
335
336 /* Function arguments that may change */
337
338 USPTR Xeptr;
339 const uschar *Xecode;
340 USPTR Xmstart;
341 USPTR Xmarkptr;
342 int Xoffset_top;
343 long int Xims;
344 eptrblock *Xeptrb;
345 int Xflags;
346 unsigned int Xrdepth;
347
348 /* Function local variables */
349
350 USPTR Xcallpat;
351 #ifdef SUPPORT_UTF8
352 USPTR Xcharptr;
353 #endif
354 USPTR Xdata;
355 USPTR Xnext;
356 USPTR Xpp;
357 USPTR Xprev;
358 USPTR Xsaved_eptr;
359
360 recursion_info Xnew_recursive;
361
362 BOOL Xcur_is_word;
363 BOOL Xcondition;
364 BOOL Xprev_is_word;
365
366 unsigned long int Xoriginal_ims;
367
368 #ifdef SUPPORT_UCP
369 int Xprop_type;
370 int Xprop_value;
371 int Xprop_fail_result;
372 int Xprop_category;
373 int Xprop_chartype;
374 int Xprop_script;
375 int Xoclength;
376 uschar Xocchars[8];
377 #endif
378
379 int Xcodelink;
380 int Xctype;
381 unsigned int Xfc;
382 int Xfi;
383 int Xlength;
384 int Xmax;
385 int Xmin;
386 int Xnumber;
387 int Xoffset;
388 int Xop;
389 int Xsave_capture_last;
390 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
391 int Xstacksave[REC_STACK_SAVE_MAX];
392
393 eptrblock Xnewptrb;
394
395 /* Where to jump back to */
396
397 int Xwhere;
398
399 } heapframe;
400
401 #endif
402
403
404 /***************************************************************************
405 ***************************************************************************/
406
407
408
409 /*************************************************
410 * Match from current position *
411 *************************************************/
412
413 /* This function is called recursively in many circumstances. Whenever it
414 returns a negative (error) response, the outer incarnation must also return the
415 same response. */
416
417 /* These macros pack up tests that are used for partial matching, and which
418 appears several times in the code. We set the "hit end" flag if the pointer is
419 at the end of the subject and also past the start of the subject (i.e.
420 something has been matched). For hard partial matching, we then return
421 immediately. The second one is used when we already know we are past the end of
422 the subject. */
423
424 #define CHECK_PARTIAL()\
425 if (md->partial != 0 && eptr >= md->end_subject && \
426 eptr > md->start_used_ptr) \
427 { \
428 md->hitend = TRUE; \
429 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430 }
431
432 #define SCHECK_PARTIAL()\
433 if (md->partial != 0 && eptr > md->start_used_ptr) \
434 { \
435 md->hitend = TRUE; \
436 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437 }
438
439
440 /* Performance note: It might be tempting to extract commonly used fields from
441 the md structure (e.g. utf8, end_subject) into individual variables to improve
442 performance. Tests using gcc on a SPARC disproved this; in the first case, it
443 made performance worse.
444
445 Arguments:
446 eptr pointer to current character in subject
447 ecode pointer to current position in compiled code
448 mstart pointer to the current match start position (can be modified
449 by encountering \K)
450 markptr pointer to the most recent MARK name, or NULL
451 offset_top current top pointer
452 md pointer to "static" info for the match
453 ims current /i, /m, and /s options
454 eptrb pointer to chain of blocks containing eptr at start of
455 brackets - for testing for empty matches
456 flags can contain
457 match_condassert - this is an assertion condition
458 match_cbegroup - this is the start of an unlimited repeat
459 group that can match an empty string
460 rdepth the recursion depth
461
462 Returns: MATCH_MATCH if matched ) these values are >= 0
463 MATCH_NOMATCH if failed to match )
464 a negative MATCH_xxx value for PRUNE, SKIP, etc
465 a negative PCRE_ERROR_xxx value if aborted by an error condition
466 (e.g. stopped by repeated call or recursion limit)
467 */
468
469 static int
470 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
471 const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
472 eptrblock *eptrb, int flags, unsigned int rdepth)
473 {
474 /* These variables do not need to be preserved over recursion in this function,
475 so they can be ordinary variables in all cases. Mark some of them with
476 "register" because they are used a lot in loops. */
477
478 register int rrc; /* Returns from recursive calls */
479 register int i; /* Used for loops not involving calls to RMATCH() */
480 register unsigned int c; /* Character values not kept over RMATCH() calls */
481 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
482
483 BOOL minimize, possessive; /* Quantifier options */
484 int condcode;
485
486 /* When recursion is not being used, all "local" variables that have to be
487 preserved over calls to RMATCH() are part of a "frame" which is obtained from
488 heap storage. Set up the top-level frame here; others are obtained from the
489 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490
491 #ifdef NO_RECURSE
492 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494 frame->Xprevframe = NULL; /* Marks the top level */
495
496 /* Copy in the original argument variables */
497
498 frame->Xeptr = eptr;
499 frame->Xecode = ecode;
500 frame->Xmstart = mstart;
501 frame->Xmarkptr = markptr;
502 frame->Xoffset_top = offset_top;
503 frame->Xims = ims;
504 frame->Xeptrb = eptrb;
505 frame->Xflags = flags;
506 frame->Xrdepth = rdepth;
507
508 /* This is where control jumps back to to effect "recursion" */
509
510 HEAP_RECURSE:
511
512 /* Macros make the argument variables come from the current frame */
513
514 #define eptr frame->Xeptr
515 #define ecode frame->Xecode
516 #define mstart frame->Xmstart
517 #define markptr frame->Xmarkptr
518 #define offset_top frame->Xoffset_top
519 #define ims frame->Xims
520 #define eptrb frame->Xeptrb
521 #define flags frame->Xflags
522 #define rdepth frame->Xrdepth
523
524 /* Ditto for the local variables */
525
526 #ifdef SUPPORT_UTF8
527 #define charptr frame->Xcharptr
528 #endif
529 #define callpat frame->Xcallpat
530 #define codelink frame->Xcodelink
531 #define data frame->Xdata
532 #define next frame->Xnext
533 #define pp frame->Xpp
534 #define prev frame->Xprev
535 #define saved_eptr frame->Xsaved_eptr
536
537 #define new_recursive frame->Xnew_recursive
538
539 #define cur_is_word frame->Xcur_is_word
540 #define condition frame->Xcondition
541 #define prev_is_word frame->Xprev_is_word
542
543 #define original_ims frame->Xoriginal_ims
544
545 #ifdef SUPPORT_UCP
546 #define prop_type frame->Xprop_type
547 #define prop_value frame->Xprop_value
548 #define prop_fail_result frame->Xprop_fail_result
549 #define prop_category frame->Xprop_category
550 #define prop_chartype frame->Xprop_chartype
551 #define prop_script frame->Xprop_script
552 #define oclength frame->Xoclength
553 #define occhars frame->Xocchars
554 #endif
555
556 #define ctype frame->Xctype
557 #define fc frame->Xfc
558 #define fi frame->Xfi
559 #define length frame->Xlength
560 #define max frame->Xmax
561 #define min frame->Xmin
562 #define number frame->Xnumber
563 #define offset frame->Xoffset
564 #define op frame->Xop
565 #define save_capture_last frame->Xsave_capture_last
566 #define save_offset1 frame->Xsave_offset1
567 #define save_offset2 frame->Xsave_offset2
568 #define save_offset3 frame->Xsave_offset3
569 #define stacksave frame->Xstacksave
570
571 #define newptrb frame->Xnewptrb
572
573 /* When recursion is being used, local variables are allocated on the stack and
574 get preserved during recursion in the normal way. In this environment, fi and
575 i, and fc and c, can be the same variables. */
576
577 #else /* NO_RECURSE not defined */
578 #define fi i
579 #define fc c
580
581
582 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
583 const uschar *charptr; /* in small blocks of the code. My normal */
584 #endif /* style of coding would have declared */
585 const uschar *callpat; /* them within each of those blocks. */
586 const uschar *data; /* However, in order to accommodate the */
587 const uschar *next; /* version of this code that uses an */
588 USPTR pp; /* external "stack" implemented on the */
589 const uschar *prev; /* heap, it is easier to declare them all */
590 USPTR saved_eptr; /* here, so the declarations can be cut */
591 /* out in a block. The only declarations */
592 recursion_info new_recursive; /* within blocks below are for variables */
593 /* that do not have to be preserved over */
594 BOOL cur_is_word; /* a recursive call to RMATCH(). */
595 BOOL condition;
596 BOOL prev_is_word;
597
598 unsigned long int original_ims;
599
600 #ifdef SUPPORT_UCP
601 int prop_type;
602 int prop_value;
603 int prop_fail_result;
604 int prop_category;
605 int prop_chartype;
606 int prop_script;
607 int oclength;
608 uschar occhars[8];
609 #endif
610
611 int codelink;
612 int ctype;
613 int length;
614 int max;
615 int min;
616 int number;
617 int offset;
618 int op;
619 int save_capture_last;
620 int save_offset1, save_offset2, save_offset3;
621 int stacksave[REC_STACK_SAVE_MAX];
622
623 eptrblock newptrb;
624 #endif /* NO_RECURSE */
625
626 /* These statements are here to stop the compiler complaining about unitialized
627 variables. */
628
629 #ifdef SUPPORT_UCP
630 prop_value = 0;
631 prop_fail_result = 0;
632 #endif
633
634
635 /* This label is used for tail recursion, which is used in a few cases even
636 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
637 used. Thanks to Ian Taylor for noticing this possibility and sending the
638 original patch. */
639
640 TAIL_RECURSE:
641
642 /* OK, now we can get on with the real code of the function. Recursive calls
643 are specified by the macro RMATCH and RRETURN is used to return. When
644 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
645 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
646 defined). However, RMATCH isn't like a function call because it's quite a
647 complicated macro. It has to be used in one particular way. This shouldn't,
648 however, impact performance when true recursion is being used. */
649
650 #ifdef SUPPORT_UTF8
651 utf8 = md->utf8; /* Local copy of the flag */
652 #else
653 utf8 = FALSE;
654 #endif
655
656 /* First check that we haven't called match() too many times, or that we
657 haven't exceeded the recursive call limit. */
658
659 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
660 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
661
662 original_ims = ims; /* Save for resetting on ')' */
663
664 /* At the start of a group with an unlimited repeat that may match an empty
665 string, the match_cbegroup flag is set. When this is the case, add the current
666 subject pointer to the chain of such remembered pointers, to be checked when we
667 hit the closing ket, in order to break infinite loops that match no characters.
668 When match() is called in other circumstances, don't add to the chain. The
669 match_cbegroup flag must NOT be used with tail recursion, because the memory
670 block that is used is on the stack, so a new one may be required for each
671 match(). */
672
673 if ((flags & match_cbegroup) != 0)
674 {
675 newptrb.epb_saved_eptr = eptr;
676 newptrb.epb_prev = eptrb;
677 eptrb = &newptrb;
678 }
679
680 /* Now start processing the opcodes. */
681
682 for (;;)
683 {
684 minimize = possessive = FALSE;
685 op = *ecode;
686
687 switch(op)
688 {
689 case OP_MARK:
690 markptr = ecode + 2;
691 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
692 ims, eptrb, flags, RM55);
693
694 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
695 argument, and we must check whether that argument matches this MARK's
696 argument. It is passed back in md->start_match_ptr (an overloading of that
697 variable). If it does match, we reset that variable to the current subject
698 position and return MATCH_SKIP. Otherwise, pass back the return code
699 unaltered. */
700
701 if (rrc == MATCH_SKIP_ARG &&
702 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
703 {
704 md->start_match_ptr = eptr;
705 RRETURN(MATCH_SKIP);
706 }
707
708 if (md->mark == NULL) md->mark = markptr;
709 RRETURN(rrc);
710
711 case OP_FAIL:
712 MRRETURN(MATCH_NOMATCH);
713
714 /* COMMIT overrides PRUNE, SKIP, and THEN */
715
716 case OP_COMMIT:
717 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718 ims, eptrb, flags, RM52);
719 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721 rrc != MATCH_THEN)
722 RRETURN(rrc);
723 MRRETURN(MATCH_COMMIT);
724
725 /* PRUNE overrides THEN */
726
727 case OP_PRUNE:
728 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729 ims, eptrb, flags, RM51);
730 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731 MRRETURN(MATCH_PRUNE);
732
733 case OP_PRUNE_ARG:
734 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735 ims, eptrb, flags, RM56);
736 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737 md->mark = ecode + 2;
738 RRETURN(MATCH_PRUNE);
739
740 /* SKIP overrides PRUNE and THEN */
741
742 case OP_SKIP:
743 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744 ims, eptrb, flags, RM53);
745 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746 RRETURN(rrc);
747 md->start_match_ptr = eptr; /* Pass back current position */
748 MRRETURN(MATCH_SKIP);
749
750 case OP_SKIP_ARG:
751 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ims, eptrb, flags, RM57);
753 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754 RRETURN(rrc);
755
756 /* Pass back the current skip name by overloading md->start_match_ptr and
757 returning the special MATCH_SKIP_ARG return code. This will either be
758 caught by a matching MARK, or get to the top, where it is treated the same
759 as PRUNE. */
760
761 md->start_match_ptr = ecode + 2;
762 RRETURN(MATCH_SKIP_ARG);
763
764 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765 the alt that is at the start of the current branch. This makes it possible
766 to skip back past alternatives that precede the THEN within the current
767 branch. */
768
769 case OP_THEN:
770 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771 ims, eptrb, flags, RM54);
772 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773 md->start_match_ptr = ecode - GET(ecode, 1);
774 MRRETURN(MATCH_THEN);
775
776 case OP_THEN_ARG:
777 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778 offset_top, md, ims, eptrb, flags, RM58);
779 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780 md->start_match_ptr = ecode - GET(ecode, 1);
781 md->mark = ecode + LINK_SIZE + 2;
782 RRETURN(MATCH_THEN);
783
784 /* Handle a capturing bracket. If there is space in the offset vector, save
785 the current subject position in the working slot at the top of the vector.
786 We mustn't change the current values of the data slot, because they may be
787 set from a previous iteration of this group, and be referred to by a
788 reference inside the group.
789
790 If the bracket fails to match, we need to restore this value and also the
791 values of the final offsets, in case they were set by a previous iteration
792 of the same bracket.
793
794 If there isn't enough space in the offset vector, treat this as if it were
795 a non-capturing bracket. Don't worry about setting the flag for the error
796 case here; that is handled in the code for KET. */
797
798 case OP_CBRA:
799 case OP_SCBRA:
800 number = GET2(ecode, 1+LINK_SIZE);
801 offset = number << 1;
802
803 #ifdef PCRE_DEBUG
804 printf("start bracket %d\n", number);
805 printf("subject=");
806 pchars(eptr, 16, TRUE, md);
807 printf("\n");
808 #endif
809
810 if (offset < md->offset_max)
811 {
812 save_offset1 = md->offset_vector[offset];
813 save_offset2 = md->offset_vector[offset+1];
814 save_offset3 = md->offset_vector[md->offset_end - number];
815 save_capture_last = md->capture_last;
816
817 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818 md->offset_vector[md->offset_end - number] =
819 (int)(eptr - md->start_subject);
820
821 flags = (op == OP_SCBRA)? match_cbegroup : 0;
822 do
823 {
824 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825 ims, eptrb, flags, RM1);
826 if (rrc != MATCH_NOMATCH &&
827 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828 RRETURN(rrc);
829 md->capture_last = save_capture_last;
830 ecode += GET(ecode, 1);
831 }
832 while (*ecode == OP_ALT);
833
834 DPRINTF(("bracket %d failed\n", number));
835
836 md->offset_vector[offset] = save_offset1;
837 md->offset_vector[offset+1] = save_offset2;
838 md->offset_vector[md->offset_end - number] = save_offset3;
839
840 if (rrc != MATCH_THEN) md->mark = markptr;
841 RRETURN(MATCH_NOMATCH);
842 }
843
844 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
845 as a non-capturing bracket. */
846
847 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
849
850 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
851
852 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
854
855 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
856 final alternative within the brackets, we would return the result of a
857 recursive call to match() whatever happened. We can reduce stack usage by
858 turning this into a tail recursion, except in the case when match_cbegroup
859 is set.*/
860
861 case OP_BRA:
862 case OP_SBRA:
863 DPRINTF(("start non-capturing bracket\n"));
864 flags = (op >= OP_SBRA)? match_cbegroup : 0;
865 for (;;)
866 {
867 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
868 {
869 if (flags == 0) /* Not a possibly empty group */
870 {
871 ecode += _pcre_OP_lengths[*ecode];
872 DPRINTF(("bracket 0 tail recursion\n"));
873 goto TAIL_RECURSE;
874 }
875
876 /* Possibly empty group; can't use tail recursion. */
877
878 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
879 eptrb, flags, RM48);
880 if (rrc == MATCH_NOMATCH) md->mark = markptr;
881 RRETURN(rrc);
882 }
883
884 /* For non-final alternatives, continue the loop for a NOMATCH result;
885 otherwise return. */
886
887 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888 eptrb, flags, RM2);
889 if (rrc != MATCH_NOMATCH &&
890 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891 RRETURN(rrc);
892 ecode += GET(ecode, 1);
893 }
894 /* Control never reaches here. */
895
896 /* Conditional group: compilation checked that there are no more than
897 two branches. If the condition is false, skipping the first branch takes us
898 past the end if there is only one branch, but that's OK because that is
899 exactly what going to the ket would do. As there is only one branch to be
900 obeyed, we can use tail recursion to avoid using another stack frame. */
901
902 case OP_COND:
903 case OP_SCOND:
904 codelink= GET(ecode, 1);
905
906 /* Because of the way auto-callout works during compile, a callout item is
907 inserted between OP_COND and an assertion condition. */
908
909 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
910 {
911 if (pcre_callout != NULL)
912 {
913 pcre_callout_block cb;
914 cb.version = 1; /* Version 1 of the callout block */
915 cb.callout_number = ecode[LINK_SIZE+2];
916 cb.offset_vector = md->offset_vector;
917 cb.subject = (PCRE_SPTR)md->start_subject;
918 cb.subject_length = (int)(md->end_subject - md->start_subject);
919 cb.start_match = (int)(mstart - md->start_subject);
920 cb.current_position = (int)(eptr - md->start_subject);
921 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
922 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
923 cb.capture_top = offset_top/2;
924 cb.capture_last = md->capture_last;
925 cb.callout_data = md->callout_data;
926 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
927 if (rrc < 0) RRETURN(rrc);
928 }
929 ecode += _pcre_OP_lengths[OP_CALLOUT];
930 }
931
932 condcode = ecode[LINK_SIZE+1];
933
934 /* Now see what the actual condition is */
935
936 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
937 {
938 if (md->recursive == NULL) /* Not recursing => FALSE */
939 {
940 condition = FALSE;
941 ecode += GET(ecode, 1);
942 }
943 else
944 {
945 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
946 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
947
948 /* If the test is for recursion into a specific subpattern, and it is
949 false, but the test was set up by name, scan the table to see if the
950 name refers to any other numbers, and test them. The condition is true
951 if any one is set. */
952
953 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
954 {
955 uschar *slotA = md->name_table;
956 for (i = 0; i < md->name_count; i++)
957 {
958 if (GET2(slotA, 0) == recno) break;
959 slotA += md->name_entry_size;
960 }
961
962 /* Found a name for the number - there can be only one; duplicate
963 names for different numbers are allowed, but not vice versa. First
964 scan down for duplicates. */
965
966 if (i < md->name_count)
967 {
968 uschar *slotB = slotA;
969 while (slotB > md->name_table)
970 {
971 slotB -= md->name_entry_size;
972 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
973 {
974 condition = GET2(slotB, 0) == md->recursive->group_num;
975 if (condition) break;
976 }
977 else break;
978 }
979
980 /* Scan up for duplicates */
981
982 if (!condition)
983 {
984 slotB = slotA;
985 for (i++; i < md->name_count; i++)
986 {
987 slotB += md->name_entry_size;
988 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
989 {
990 condition = GET2(slotB, 0) == md->recursive->group_num;
991 if (condition) break;
992 }
993 else break;
994 }
995 }
996 }
997 }
998
999 /* Chose branch according to the condition */
1000
1001 ecode += condition? 3 : GET(ecode, 1);
1002 }
1003 }
1004
1005 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1006 {
1007 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1008 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1009
1010 /* If the numbered capture is unset, but the reference was by name,
1011 scan the table to see if the name refers to any other numbers, and test
1012 them. The condition is true if any one is set. This is tediously similar
1013 to the code above, but not close enough to try to amalgamate. */
1014
1015 if (!condition && condcode == OP_NCREF)
1016 {
1017 int refno = offset >> 1;
1018 uschar *slotA = md->name_table;
1019
1020 for (i = 0; i < md->name_count; i++)
1021 {
1022 if (GET2(slotA, 0) == refno) break;
1023 slotA += md->name_entry_size;
1024 }
1025
1026 /* Found a name for the number - there can be only one; duplicate names
1027 for different numbers are allowed, but not vice versa. First scan down
1028 for duplicates. */
1029
1030 if (i < md->name_count)
1031 {
1032 uschar *slotB = slotA;
1033 while (slotB > md->name_table)
1034 {
1035 slotB -= md->name_entry_size;
1036 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1037 {
1038 offset = GET2(slotB, 0) << 1;
1039 condition = offset < offset_top &&
1040 md->offset_vector[offset] >= 0;
1041 if (condition) break;
1042 }
1043 else break;
1044 }
1045
1046 /* Scan up for duplicates */
1047
1048 if (!condition)
1049 {
1050 slotB = slotA;
1051 for (i++; i < md->name_count; i++)
1052 {
1053 slotB += md->name_entry_size;
1054 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1055 {
1056 offset = GET2(slotB, 0) << 1;
1057 condition = offset < offset_top &&
1058 md->offset_vector[offset] >= 0;
1059 if (condition) break;
1060 }
1061 else break;
1062 }
1063 }
1064 }
1065 }
1066
1067 /* Chose branch according to the condition */
1068
1069 ecode += condition? 3 : GET(ecode, 1);
1070 }
1071
1072 else if (condcode == OP_DEF) /* DEFINE - always false */
1073 {
1074 condition = FALSE;
1075 ecode += GET(ecode, 1);
1076 }
1077
1078 /* The condition is an assertion. Call match() to evaluate it - setting
1079 the final argument match_condassert causes it to stop at the end of an
1080 assertion. */
1081
1082 else
1083 {
1084 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1085 match_condassert, RM3);
1086 if (rrc == MATCH_MATCH)
1087 {
1088 condition = TRUE;
1089 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091 }
1092 else if (rrc != MATCH_NOMATCH &&
1093 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094 {
1095 RRETURN(rrc); /* Need braces because of following else */
1096 }
1097 else
1098 {
1099 condition = FALSE;
1100 ecode += codelink;
1101 }
1102 }
1103
1104 /* We are now at the branch that is to be obeyed. As there is only one,
1105 we can use tail recursion to avoid using another stack frame, except when
1106 match_cbegroup is required for an unlimited repeat of a possibly empty
1107 group. If the second alternative doesn't exist, we can just plough on. */
1108
1109 if (condition || *ecode == OP_ALT)
1110 {
1111 ecode += 1 + LINK_SIZE;
1112 if (op == OP_SCOND) /* Possibly empty group */
1113 {
1114 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1115 RRETURN(rrc);
1116 }
1117 else /* Group must match something */
1118 {
1119 flags = 0;
1120 goto TAIL_RECURSE;
1121 }
1122 }
1123 else /* Condition false & no alternative */
1124 {
1125 ecode += 1 + LINK_SIZE;
1126 }
1127 break;
1128
1129
1130 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1131 to close any currently open capturing brackets. */
1132
1133 case OP_CLOSE:
1134 number = GET2(ecode, 1);
1135 offset = number << 1;
1136
1137 #ifdef PCRE_DEBUG
1138 printf("end bracket %d at *ACCEPT", number);
1139 printf("\n");
1140 #endif
1141
1142 md->capture_last = number;
1143 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1144 {
1145 md->offset_vector[offset] =
1146 md->offset_vector[md->offset_end - number];
1147 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148 if (offset_top <= offset) offset_top = offset + 2;
1149 }
1150 ecode += 3;
1151 break;
1152
1153
1154 /* End of the pattern, either real or forced. If we are in a top-level
1155 recursion, we should restore the offsets appropriately and continue from
1156 after the call. */
1157
1158 case OP_ACCEPT:
1159 case OP_END:
1160 if (md->recursive != NULL && md->recursive->group_num == 0)
1161 {
1162 recursion_info *rec = md->recursive;
1163 DPRINTF(("End of pattern in a (?0) recursion\n"));
1164 md->recursive = rec->prevrec;
1165 memmove(md->offset_vector, rec->offset_save,
1166 rec->saved_max * sizeof(int));
1167 offset_top = rec->save_offset_top;
1168 ims = original_ims;
1169 ecode = rec->after_call;
1170 break;
1171 }
1172
1173 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1174 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1175 the subject. In both cases, backtracking will then try other alternatives,
1176 if any. */
1177
1178 if (eptr == mstart &&
1179 (md->notempty ||
1180 (md->notempty_atstart &&
1181 mstart == md->start_subject + md->start_offset)))
1182 MRRETURN(MATCH_NOMATCH);
1183
1184 /* Otherwise, we have a match. */
1185
1186 md->end_match_ptr = eptr; /* Record where we ended */
1187 md->end_offset_top = offset_top; /* and how many extracts were taken */
1188 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1189
1190 /* For some reason, the macros don't work properly if an expression is
1191 given as the argument to MRRETURN when the heap is in use. */
1192
1193 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1194 MRRETURN(rrc);
1195
1196 /* Change option settings */
1197
1198 case OP_OPT:
1199 ims = ecode[1];
1200 ecode += 2;
1201 DPRINTF(("ims set to %02lx\n", ims));
1202 break;
1203
1204 /* Assertion brackets. Check the alternative branches in turn - the
1205 matching won't pass the KET for an assertion. If any one branch matches,
1206 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1207 start of each branch to move the current point backwards, so the code at
1208 this level is identical to the lookahead case. */
1209
1210 case OP_ASSERT:
1211 case OP_ASSERTBACK:
1212 do
1213 {
1214 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215 RM4);
1216 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1217 {
1218 mstart = md->start_match_ptr; /* In case \K reset it */
1219 break;
1220 }
1221 if (rrc != MATCH_NOMATCH &&
1222 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223 RRETURN(rrc);
1224 ecode += GET(ecode, 1);
1225 }
1226 while (*ecode == OP_ALT);
1227 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1228
1229 /* If checking an assertion for a condition, return MATCH_MATCH. */
1230
1231 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1232
1233 /* Continue from after the assertion, updating the offsets high water
1234 mark, since extracts may have been taken during the assertion. */
1235
1236 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1237 ecode += 1 + LINK_SIZE;
1238 offset_top = md->end_offset_top;
1239 continue;
1240
1241 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1242 PRUNE, or COMMIT means we must assume failure without checking subsequent
1243 branches. */
1244
1245 case OP_ASSERT_NOT:
1246 case OP_ASSERTBACK_NOT:
1247 do
1248 {
1249 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1250 RM5);
1251 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1252 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1253 {
1254 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255 break;
1256 }
1257 if (rrc != MATCH_NOMATCH &&
1258 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259 RRETURN(rrc);
1260 ecode += GET(ecode,1);
1261 }
1262 while (*ecode == OP_ALT);
1263
1264 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1265
1266 ecode += 1 + LINK_SIZE;
1267 continue;
1268
1269 /* Move the subject pointer back. This occurs only at the start of
1270 each branch of a lookbehind assertion. If we are too close to the start to
1271 move back, this match function fails. When working with UTF-8 we move
1272 back a number of characters, not bytes. */
1273
1274 case OP_REVERSE:
1275 #ifdef SUPPORT_UTF8
1276 if (utf8)
1277 {
1278 i = GET(ecode, 1);
1279 while (i-- > 0)
1280 {
1281 eptr--;
1282 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1283 BACKCHAR(eptr);
1284 }
1285 }
1286 else
1287 #endif
1288
1289 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1290
1291 {
1292 eptr -= GET(ecode, 1);
1293 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294 }
1295
1296 /* Save the earliest consulted character, then skip to next op code */
1297
1298 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1299 ecode += 1 + LINK_SIZE;
1300 break;
1301
1302 /* The callout item calls an external function, if one is provided, passing
1303 details of the match so far. This is mainly for debugging, though the
1304 function is able to force a failure. */
1305
1306 case OP_CALLOUT:
1307 if (pcre_callout != NULL)
1308 {
1309 pcre_callout_block cb;
1310 cb.version = 1; /* Version 1 of the callout block */
1311 cb.callout_number = ecode[1];
1312 cb.offset_vector = md->offset_vector;
1313 cb.subject = (PCRE_SPTR)md->start_subject;
1314 cb.subject_length = (int)(md->end_subject - md->start_subject);
1315 cb.start_match = (int)(mstart - md->start_subject);
1316 cb.current_position = (int)(eptr - md->start_subject);
1317 cb.pattern_position = GET(ecode, 2);
1318 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319 cb.capture_top = offset_top/2;
1320 cb.capture_last = md->capture_last;
1321 cb.callout_data = md->callout_data;
1322 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1323 if (rrc < 0) RRETURN(rrc);
1324 }
1325 ecode += 2 + 2*LINK_SIZE;
1326 break;
1327
1328 /* Recursion either matches the current regex, or some subexpression. The
1329 offset data is the offset to the starting bracket from the start of the
1330 whole pattern. (This is so that it works from duplicated subpatterns.)
1331
1332 If there are any capturing brackets started but not finished, we have to
1333 save their starting points and reinstate them after the recursion. However,
1334 we don't know how many such there are (offset_top records the completed
1335 total) so we just have to save all the potential data. There may be up to
1336 65535 such values, which is too large to put on the stack, but using malloc
1337 for small numbers seems expensive. As a compromise, the stack is used when
1338 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1339 is used. A problem is what to do if the malloc fails ... there is no way of
1340 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1341 values on the stack, and accept that the rest may be wrong.
1342
1343 There are also other values that have to be saved. We use a chained
1344 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1345 for the original version of this logic. */
1346
1347 case OP_RECURSE:
1348 {
1349 callpat = md->start_code + GET(ecode, 1);
1350 new_recursive.group_num = (callpat == md->start_code)? 0 :
1351 GET2(callpat, 1 + LINK_SIZE);
1352
1353 /* Add to "recursing stack" */
1354
1355 new_recursive.prevrec = md->recursive;
1356 md->recursive = &new_recursive;
1357
1358 /* Find where to continue from afterwards */
1359
1360 ecode += 1 + LINK_SIZE;
1361 new_recursive.after_call = ecode;
1362
1363 /* Now save the offset data. */
1364
1365 new_recursive.saved_max = md->offset_end;
1366 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1367 new_recursive.offset_save = stacksave;
1368 else
1369 {
1370 new_recursive.offset_save =
1371 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1372 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1373 }
1374
1375 memcpy(new_recursive.offset_save, md->offset_vector,
1376 new_recursive.saved_max * sizeof(int));
1377 new_recursive.save_offset_top = offset_top;
1378
1379 /* OK, now we can do the recursion. For each top-level alternative we
1380 restore the offset and recursion data. */
1381
1382 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1383 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1384 do
1385 {
1386 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1387 md, ims, eptrb, flags, RM6);
1388 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1389 {
1390 DPRINTF(("Recursion matched\n"));
1391 md->recursive = new_recursive.prevrec;
1392 if (new_recursive.offset_save != stacksave)
1393 (pcre_free)(new_recursive.offset_save);
1394 MRRETURN(MATCH_MATCH);
1395 }
1396 else if (rrc != MATCH_NOMATCH &&
1397 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398 {
1399 DPRINTF(("Recursion gave error %d\n", rrc));
1400 if (new_recursive.offset_save != stacksave)
1401 (pcre_free)(new_recursive.offset_save);
1402 RRETURN(rrc);
1403 }
1404
1405 md->recursive = &new_recursive;
1406 memcpy(md->offset_vector, new_recursive.offset_save,
1407 new_recursive.saved_max * sizeof(int));
1408 callpat += GET(callpat, 1);
1409 }
1410 while (*callpat == OP_ALT);
1411
1412 DPRINTF(("Recursion didn't match\n"));
1413 md->recursive = new_recursive.prevrec;
1414 if (new_recursive.offset_save != stacksave)
1415 (pcre_free)(new_recursive.offset_save);
1416 MRRETURN(MATCH_NOMATCH);
1417 }
1418 /* Control never reaches here */
1419
1420 /* "Once" brackets are like assertion brackets except that after a match,
1421 the point in the subject string is not moved back. Thus there can never be
1422 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423 Check the alternative branches in turn - the matching won't pass the KET
1424 for this kind of subpattern. If any one branch matches, we carry on as at
1425 the end of a normal bracket, leaving the subject pointer, but resetting
1426 the start-of-match value in case it was changed by \K. */
1427
1428 case OP_ONCE:
1429 prev = ecode;
1430 saved_eptr = eptr;
1431
1432 do
1433 {
1434 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1435 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1436 {
1437 mstart = md->start_match_ptr;
1438 break;
1439 }
1440 if (rrc != MATCH_NOMATCH &&
1441 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442 RRETURN(rrc);
1443 ecode += GET(ecode,1);
1444 }
1445 while (*ecode == OP_ALT);
1446
1447 /* If hit the end of the group (which could be repeated), fail */
1448
1449 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1450
1451 /* Continue as from after the assertion, updating the offsets high water
1452 mark, since extracts may have been taken. */
1453
1454 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1455
1456 offset_top = md->end_offset_top;
1457 eptr = md->end_match_ptr;
1458
1459 /* For a non-repeating ket, just continue at this level. This also
1460 happens for a repeating ket if no characters were matched in the group.
1461 This is the forcible breaking of infinite loops as implemented in Perl
1462 5.005. If there is an options reset, it will get obeyed in the normal
1463 course of events. */
1464
1465 if (*ecode == OP_KET || eptr == saved_eptr)
1466 {
1467 ecode += 1+LINK_SIZE;
1468 break;
1469 }
1470
1471 /* The repeating kets try the rest of the pattern or restart from the
1472 preceding bracket, in the appropriate order. The second "call" of match()
1473 uses tail recursion, to avoid using another stack frame. We need to reset
1474 any options that changed within the bracket before re-running it, so
1475 check the next opcode. */
1476
1477 if (ecode[1+LINK_SIZE] == OP_OPT)
1478 {
1479 ims = (ims & ~PCRE_IMS) | ecode[4];
1480 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1481 }
1482
1483 if (*ecode == OP_KETRMIN)
1484 {
1485 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1486 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1487 ecode = prev;
1488 flags = 0;
1489 goto TAIL_RECURSE;
1490 }
1491 else /* OP_KETRMAX */
1492 {
1493 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1494 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495 ecode += 1 + LINK_SIZE;
1496 flags = 0;
1497 goto TAIL_RECURSE;
1498 }
1499 /* Control never gets here */
1500
1501 /* An alternation is the end of a branch; scan along to find the end of the
1502 bracketed group and go to there. */
1503
1504 case OP_ALT:
1505 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1506 break;
1507
1508 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1509 indicating that it may occur zero times. It may repeat infinitely, or not
1510 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1511 with fixed upper repeat limits are compiled as a number of copies, with the
1512 optional ones preceded by BRAZERO or BRAMINZERO. */
1513
1514 case OP_BRAZERO:
1515 {
1516 next = ecode+1;
1517 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1518 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1519 do next += GET(next,1); while (*next == OP_ALT);
1520 ecode = next + 1 + LINK_SIZE;
1521 }
1522 break;
1523
1524 case OP_BRAMINZERO:
1525 {
1526 next = ecode+1;
1527 do next += GET(next, 1); while (*next == OP_ALT);
1528 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1529 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1530 ecode++;
1531 }
1532 break;
1533
1534 case OP_SKIPZERO:
1535 {
1536 next = ecode+1;
1537 do next += GET(next,1); while (*next == OP_ALT);
1538 ecode = next + 1 + LINK_SIZE;
1539 }
1540 break;
1541
1542 /* End of a group, repeated or non-repeating. */
1543
1544 case OP_KET:
1545 case OP_KETRMIN:
1546 case OP_KETRMAX:
1547 prev = ecode - GET(ecode, 1);
1548
1549 /* If this was a group that remembered the subject start, in order to break
1550 infinite repeats of empty string matches, retrieve the subject start from
1551 the chain. Otherwise, set it NULL. */
1552
1553 if (*prev >= OP_SBRA)
1554 {
1555 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1556 eptrb = eptrb->epb_prev; /* Backup to previous group */
1557 }
1558 else saved_eptr = NULL;
1559
1560 /* If we are at the end of an assertion group or an atomic group, stop
1561 matching and return MATCH_MATCH, but record the current high water mark for
1562 use by positive assertions. We also need to record the match start in case
1563 it was changed by \K. */
1564
1565 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1566 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1567 *prev == OP_ONCE)
1568 {
1569 md->end_match_ptr = eptr; /* For ONCE */
1570 md->end_offset_top = offset_top;
1571 md->start_match_ptr = mstart;
1572 MRRETURN(MATCH_MATCH);
1573 }
1574
1575 /* For capturing groups we have to check the group number back at the start
1576 and if necessary complete handling an extraction by setting the offsets and
1577 bumping the high water mark. Note that whole-pattern recursion is coded as
1578 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1579 when the OP_END is reached. Other recursion is handled here. */
1580
1581 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1582 {
1583 number = GET2(prev, 1+LINK_SIZE);
1584 offset = number << 1;
1585
1586 #ifdef PCRE_DEBUG
1587 printf("end bracket %d", number);
1588 printf("\n");
1589 #endif
1590
1591 md->capture_last = number;
1592 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1593 {
1594 md->offset_vector[offset] =
1595 md->offset_vector[md->offset_end - number];
1596 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597 if (offset_top <= offset) offset_top = offset + 2;
1598 }
1599
1600 /* Handle a recursively called group. Restore the offsets
1601 appropriately and continue from after the call. */
1602
1603 if (md->recursive != NULL && md->recursive->group_num == number)
1604 {
1605 recursion_info *rec = md->recursive;
1606 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1607 md->recursive = rec->prevrec;
1608 memcpy(md->offset_vector, rec->offset_save,
1609 rec->saved_max * sizeof(int));
1610 offset_top = rec->save_offset_top;
1611 ecode = rec->after_call;
1612 ims = original_ims;
1613 break;
1614 }
1615 }
1616
1617 /* For both capturing and non-capturing groups, reset the value of the ims
1618 flags, in case they got changed during the group. */
1619
1620 ims = original_ims;
1621 DPRINTF(("ims reset to %02lx\n", ims));
1622
1623 /* For a non-repeating ket, just continue at this level. This also
1624 happens for a repeating ket if no characters were matched in the group.
1625 This is the forcible breaking of infinite loops as implemented in Perl
1626 5.005. If there is an options reset, it will get obeyed in the normal
1627 course of events. */
1628
1629 if (*ecode == OP_KET || eptr == saved_eptr)
1630 {
1631 ecode += 1 + LINK_SIZE;
1632 break;
1633 }
1634
1635 /* The repeating kets try the rest of the pattern or restart from the
1636 preceding bracket, in the appropriate order. In the second case, we can use
1637 tail recursion to avoid using another stack frame, unless we have an
1638 unlimited repeat of a group that can match an empty string. */
1639
1640 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1641
1642 if (*ecode == OP_KETRMIN)
1643 {
1644 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1645 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646 if (flags != 0) /* Could match an empty string */
1647 {
1648 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1649 RRETURN(rrc);
1650 }
1651 ecode = prev;
1652 goto TAIL_RECURSE;
1653 }
1654 else /* OP_KETRMAX */
1655 {
1656 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1657 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1658 ecode += 1 + LINK_SIZE;
1659 flags = 0;
1660 goto TAIL_RECURSE;
1661 }
1662 /* Control never gets here */
1663
1664 /* Start of subject unless notbol, or after internal newline if multiline */
1665
1666 case OP_CIRC:
1667 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1668 if ((ims & PCRE_MULTILINE) != 0)
1669 {
1670 if (eptr != md->start_subject &&
1671 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1672 MRRETURN(MATCH_NOMATCH);
1673 ecode++;
1674 break;
1675 }
1676 /* ... else fall through */
1677
1678 /* Start of subject assertion */
1679
1680 case OP_SOD:
1681 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1682 ecode++;
1683 break;
1684
1685 /* Start of match assertion */
1686
1687 case OP_SOM:
1688 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1689 ecode++;
1690 break;
1691
1692 /* Reset the start of match point */
1693
1694 case OP_SET_SOM:
1695 mstart = eptr;
1696 ecode++;
1697 break;
1698
1699 /* Assert before internal newline if multiline, or before a terminating
1700 newline unless endonly is set, else end of subject unless noteol is set. */
1701
1702 case OP_DOLL:
1703 if ((ims & PCRE_MULTILINE) != 0)
1704 {
1705 if (eptr < md->end_subject)
1706 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707 else
1708 {
1709 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710 SCHECK_PARTIAL();
1711 }
1712 ecode++;
1713 break;
1714 }
1715 else /* Not multiline */
1716 {
1717 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1719 }
1720
1721 /* ... else fall through for endonly */
1722
1723 /* End of subject assertion (\z) */
1724
1725 case OP_EOD:
1726 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727 SCHECK_PARTIAL();
1728 ecode++;
1729 break;
1730
1731 /* End of subject or ending \n assertion (\Z) */
1732
1733 case OP_EODN:
1734 ASSERT_NL_OR_EOS:
1735 if (eptr < md->end_subject &&
1736 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737 MRRETURN(MATCH_NOMATCH);
1738
1739 /* Either at end of string or \n before end. */
1740
1741 SCHECK_PARTIAL();
1742 ecode++;
1743 break;
1744
1745 /* Word boundary assertions */
1746
1747 case OP_NOT_WORD_BOUNDARY:
1748 case OP_WORD_BOUNDARY:
1749 {
1750
1751 /* Find out if the previous and current characters are "word" characters.
1752 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1753 be "non-word" characters. Remember the earliest consulted character for
1754 partial matching. */
1755
1756 #ifdef SUPPORT_UTF8
1757 if (utf8)
1758 {
1759 /* Get status of previous character */
1760
1761 if (eptr == md->start_subject) prev_is_word = FALSE; else
1762 {
1763 USPTR lastptr = eptr - 1;
1764 while((*lastptr & 0xc0) == 0x80) lastptr--;
1765 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766 GETCHAR(c, lastptr);
1767 #ifdef SUPPORT_UCP
1768 if (md->use_ucp)
1769 {
1770 if (c == '_') prev_is_word = TRUE; else
1771 {
1772 int cat = UCD_CATEGORY(c);
1773 prev_is_word = (cat == ucp_L || cat == ucp_N);
1774 }
1775 }
1776 else
1777 #endif
1778 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779 }
1780
1781 /* Get status of next character */
1782
1783 if (eptr >= md->end_subject)
1784 {
1785 SCHECK_PARTIAL();
1786 cur_is_word = FALSE;
1787 }
1788 else
1789 {
1790 GETCHAR(c, eptr);
1791 #ifdef SUPPORT_UCP
1792 if (md->use_ucp)
1793 {
1794 if (c == '_') cur_is_word = TRUE; else
1795 {
1796 int cat = UCD_CATEGORY(c);
1797 cur_is_word = (cat == ucp_L || cat == ucp_N);
1798 }
1799 }
1800 else
1801 #endif
1802 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803 }
1804 }
1805 else
1806 #endif
1807
1808 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809 consistency with the behaviour of \w we do use it in this case. */
1810
1811 {
1812 /* Get status of previous character */
1813
1814 if (eptr == md->start_subject) prev_is_word = FALSE; else
1815 {
1816 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817 #ifdef SUPPORT_UCP
1818 if (md->use_ucp)
1819 {
1820 c = eptr[-1];
1821 if (c == '_') prev_is_word = TRUE; else
1822 {
1823 int cat = UCD_CATEGORY(c);
1824 prev_is_word = (cat == ucp_L || cat == ucp_N);
1825 }
1826 }
1827 else
1828 #endif
1829 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830 }
1831
1832 /* Get status of next character */
1833
1834 if (eptr >= md->end_subject)
1835 {
1836 SCHECK_PARTIAL();
1837 cur_is_word = FALSE;
1838 }
1839 else
1840 #ifdef SUPPORT_UCP
1841 if (md->use_ucp)
1842 {
1843 c = *eptr;
1844 if (c == '_') cur_is_word = TRUE; else
1845 {
1846 int cat = UCD_CATEGORY(c);
1847 cur_is_word = (cat == ucp_L || cat == ucp_N);
1848 }
1849 }
1850 else
1851 #endif
1852 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853 }
1854
1855 /* Now see if the situation is what we want */
1856
1857 if ((*ecode++ == OP_WORD_BOUNDARY)?
1858 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1859 MRRETURN(MATCH_NOMATCH);
1860 }
1861 break;
1862
1863 /* Match a single character type; inline for speed */
1864
1865 case OP_ANY:
1866 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1867 /* Fall through */
1868
1869 case OP_ALLANY:
1870 if (eptr++ >= md->end_subject)
1871 {
1872 SCHECK_PARTIAL();
1873 MRRETURN(MATCH_NOMATCH);
1874 }
1875 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1876 ecode++;
1877 break;
1878
1879 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1880 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1881
1882 case OP_ANYBYTE:
1883 if (eptr++ >= md->end_subject)
1884 {
1885 SCHECK_PARTIAL();
1886 MRRETURN(MATCH_NOMATCH);
1887 }
1888 ecode++;
1889 break;
1890
1891 case OP_NOT_DIGIT:
1892 if (eptr >= md->end_subject)
1893 {
1894 SCHECK_PARTIAL();
1895 MRRETURN(MATCH_NOMATCH);
1896 }
1897 GETCHARINCTEST(c, eptr);
1898 if (
1899 #ifdef SUPPORT_UTF8
1900 c < 256 &&
1901 #endif
1902 (md->ctypes[c] & ctype_digit) != 0
1903 )
1904 MRRETURN(MATCH_NOMATCH);
1905 ecode++;
1906 break;
1907
1908 case OP_DIGIT:
1909 if (eptr >= md->end_subject)
1910 {
1911 SCHECK_PARTIAL();
1912 MRRETURN(MATCH_NOMATCH);
1913 }
1914 GETCHARINCTEST(c, eptr);
1915 if (
1916 #ifdef SUPPORT_UTF8
1917 c >= 256 ||
1918 #endif
1919 (md->ctypes[c] & ctype_digit) == 0
1920 )
1921 MRRETURN(MATCH_NOMATCH);
1922 ecode++;
1923 break;
1924
1925 case OP_NOT_WHITESPACE:
1926 if (eptr >= md->end_subject)
1927 {
1928 SCHECK_PARTIAL();
1929 MRRETURN(MATCH_NOMATCH);
1930 }
1931 GETCHARINCTEST(c, eptr);
1932 if (
1933 #ifdef SUPPORT_UTF8
1934 c < 256 &&
1935 #endif
1936 (md->ctypes[c] & ctype_space) != 0
1937 )
1938 MRRETURN(MATCH_NOMATCH);
1939 ecode++;
1940 break;
1941
1942 case OP_WHITESPACE:
1943 if (eptr >= md->end_subject)
1944 {
1945 SCHECK_PARTIAL();
1946 MRRETURN(MATCH_NOMATCH);
1947 }
1948 GETCHARINCTEST(c, eptr);
1949 if (
1950 #ifdef SUPPORT_UTF8
1951 c >= 256 ||
1952 #endif
1953 (md->ctypes[c] & ctype_space) == 0
1954 )
1955 MRRETURN(MATCH_NOMATCH);
1956 ecode++;
1957 break;
1958
1959 case OP_NOT_WORDCHAR:
1960 if (eptr >= md->end_subject)
1961 {
1962 SCHECK_PARTIAL();
1963 MRRETURN(MATCH_NOMATCH);
1964 }
1965 GETCHARINCTEST(c, eptr);
1966 if (
1967 #ifdef SUPPORT_UTF8
1968 c < 256 &&
1969 #endif
1970 (md->ctypes[c] & ctype_word) != 0
1971 )
1972 MRRETURN(MATCH_NOMATCH);
1973 ecode++;
1974 break;
1975
1976 case OP_WORDCHAR:
1977 if (eptr >= md->end_subject)
1978 {
1979 SCHECK_PARTIAL();
1980 MRRETURN(MATCH_NOMATCH);
1981 }
1982 GETCHARINCTEST(c, eptr);
1983 if (
1984 #ifdef SUPPORT_UTF8
1985 c >= 256 ||
1986 #endif
1987 (md->ctypes[c] & ctype_word) == 0
1988 )
1989 MRRETURN(MATCH_NOMATCH);
1990 ecode++;
1991 break;
1992
1993 case OP_ANYNL:
1994 if (eptr >= md->end_subject)
1995 {
1996 SCHECK_PARTIAL();
1997 MRRETURN(MATCH_NOMATCH);
1998 }
1999 GETCHARINCTEST(c, eptr);
2000 switch(c)
2001 {
2002 default: MRRETURN(MATCH_NOMATCH);
2003 case 0x000d:
2004 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2005 break;
2006
2007 case 0x000a:
2008 break;
2009
2010 case 0x000b:
2011 case 0x000c:
2012 case 0x0085:
2013 case 0x2028:
2014 case 0x2029:
2015 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2016 break;
2017 }
2018 ecode++;
2019 break;
2020
2021 case OP_NOT_HSPACE:
2022 if (eptr >= md->end_subject)
2023 {
2024 SCHECK_PARTIAL();
2025 MRRETURN(MATCH_NOMATCH);
2026 }
2027 GETCHARINCTEST(c, eptr);
2028 switch(c)
2029 {
2030 default: break;
2031 case 0x09: /* HT */
2032 case 0x20: /* SPACE */
2033 case 0xa0: /* NBSP */
2034 case 0x1680: /* OGHAM SPACE MARK */
2035 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2036 case 0x2000: /* EN QUAD */
2037 case 0x2001: /* EM QUAD */
2038 case 0x2002: /* EN SPACE */
2039 case 0x2003: /* EM SPACE */
2040 case 0x2004: /* THREE-PER-EM SPACE */
2041 case 0x2005: /* FOUR-PER-EM SPACE */
2042 case 0x2006: /* SIX-PER-EM SPACE */
2043 case 0x2007: /* FIGURE SPACE */
2044 case 0x2008: /* PUNCTUATION SPACE */
2045 case 0x2009: /* THIN SPACE */
2046 case 0x200A: /* HAIR SPACE */
2047 case 0x202f: /* NARROW NO-BREAK SPACE */
2048 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2049 case 0x3000: /* IDEOGRAPHIC SPACE */
2050 MRRETURN(MATCH_NOMATCH);
2051 }
2052 ecode++;
2053 break;
2054
2055 case OP_HSPACE:
2056 if (eptr >= md->end_subject)
2057 {
2058 SCHECK_PARTIAL();
2059 MRRETURN(MATCH_NOMATCH);
2060 }
2061 GETCHARINCTEST(c, eptr);
2062 switch(c)
2063 {
2064 default: MRRETURN(MATCH_NOMATCH);
2065 case 0x09: /* HT */
2066 case 0x20: /* SPACE */
2067 case 0xa0: /* NBSP */
2068 case 0x1680: /* OGHAM SPACE MARK */
2069 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2070 case 0x2000: /* EN QUAD */
2071 case 0x2001: /* EM QUAD */
2072 case 0x2002: /* EN SPACE */
2073 case 0x2003: /* EM SPACE */
2074 case 0x2004: /* THREE-PER-EM SPACE */
2075 case 0x2005: /* FOUR-PER-EM SPACE */
2076 case 0x2006: /* SIX-PER-EM SPACE */
2077 case 0x2007: /* FIGURE SPACE */
2078 case 0x2008: /* PUNCTUATION SPACE */
2079 case 0x2009: /* THIN SPACE */
2080 case 0x200A: /* HAIR SPACE */
2081 case 0x202f: /* NARROW NO-BREAK SPACE */
2082 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2083 case 0x3000: /* IDEOGRAPHIC SPACE */
2084 break;
2085 }
2086 ecode++;
2087 break;
2088
2089 case OP_NOT_VSPACE:
2090 if (eptr >= md->end_subject)
2091 {
2092 SCHECK_PARTIAL();
2093 MRRETURN(MATCH_NOMATCH);
2094 }
2095 GETCHARINCTEST(c, eptr);
2096 switch(c)
2097 {
2098 default: break;
2099 case 0x0a: /* LF */
2100 case 0x0b: /* VT */
2101 case 0x0c: /* FF */
2102 case 0x0d: /* CR */
2103 case 0x85: /* NEL */
2104 case 0x2028: /* LINE SEPARATOR */
2105 case 0x2029: /* PARAGRAPH SEPARATOR */
2106 MRRETURN(MATCH_NOMATCH);
2107 }
2108 ecode++;
2109 break;
2110
2111 case OP_VSPACE:
2112 if (eptr >= md->end_subject)
2113 {
2114 SCHECK_PARTIAL();
2115 MRRETURN(MATCH_NOMATCH);
2116 }
2117 GETCHARINCTEST(c, eptr);
2118 switch(c)
2119 {
2120 default: MRRETURN(MATCH_NOMATCH);
2121 case 0x0a: /* LF */
2122 case 0x0b: /* VT */
2123 case 0x0c: /* FF */
2124 case 0x0d: /* CR */
2125 case 0x85: /* NEL */
2126 case 0x2028: /* LINE SEPARATOR */
2127 case 0x2029: /* PARAGRAPH SEPARATOR */
2128 break;
2129 }
2130 ecode++;
2131 break;
2132
2133 #ifdef SUPPORT_UCP
2134 /* Check the next character by Unicode property. We will get here only
2135 if the support is in the binary; otherwise a compile-time error occurs. */
2136
2137 case OP_PROP:
2138 case OP_NOTPROP:
2139 if (eptr >= md->end_subject)
2140 {
2141 SCHECK_PARTIAL();
2142 MRRETURN(MATCH_NOMATCH);
2143 }
2144 GETCHARINCTEST(c, eptr);
2145 {
2146 const ucd_record *prop = GET_UCD(c);
2147
2148 switch(ecode[1])
2149 {
2150 case PT_ANY:
2151 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2152 break;
2153
2154 case PT_LAMP:
2155 if ((prop->chartype == ucp_Lu ||
2156 prop->chartype == ucp_Ll ||
2157 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2158 MRRETURN(MATCH_NOMATCH);
2159 break;
2160
2161 case PT_GC:
2162 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2163 MRRETURN(MATCH_NOMATCH);
2164 break;
2165
2166 case PT_PC:
2167 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2168 MRRETURN(MATCH_NOMATCH);
2169 break;
2170
2171 case PT_SC:
2172 if ((ecode[2] != prop->script) == (op == OP_PROP))
2173 MRRETURN(MATCH_NOMATCH);
2174 break;
2175
2176 /* These are specials */
2177
2178 case PT_ALNUM:
2179 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2180 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2181 MRRETURN(MATCH_NOMATCH);
2182 break;
2183
2184 case PT_SPACE: /* Perl space */
2185 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2186 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187 == (op == OP_NOTPROP))
2188 MRRETURN(MATCH_NOMATCH);
2189 break;
2190
2191 case PT_PXSPACE: /* POSIX space */
2192 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2193 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194 c == CHAR_FF || c == CHAR_CR)
2195 == (op == OP_NOTPROP))
2196 MRRETURN(MATCH_NOMATCH);
2197 break;
2198
2199 case PT_WORD:
2200 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2201 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2202 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203 MRRETURN(MATCH_NOMATCH);
2204 break;
2205
2206 /* This should never occur */
2207
2208 default:
2209 RRETURN(PCRE_ERROR_INTERNAL);
2210 }
2211
2212 ecode += 3;
2213 }
2214 break;
2215
2216 /* Match an extended Unicode sequence. We will get here only if the support
2217 is in the binary; otherwise a compile-time error occurs. */
2218
2219 case OP_EXTUNI:
2220 if (eptr >= md->end_subject)
2221 {
2222 SCHECK_PARTIAL();
2223 MRRETURN(MATCH_NOMATCH);
2224 }
2225 GETCHARINCTEST(c, eptr);
2226 {
2227 int category = UCD_CATEGORY(c);
2228 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2229 while (eptr < md->end_subject)
2230 {
2231 int len = 1;
2232 if (!utf8) c = *eptr; else
2233 {
2234 GETCHARLEN(c, eptr, len);
2235 }
2236 category = UCD_CATEGORY(c);
2237 if (category != ucp_M) break;
2238 eptr += len;
2239 }
2240 }
2241 ecode++;
2242 break;
2243 #endif
2244
2245
2246 /* Match a back reference, possibly repeatedly. Look past the end of the
2247 item to see if there is repeat information following. The code is similar
2248 to that for character classes, but repeated for efficiency. Then obey
2249 similar code to character type repeats - written out again for speed.
2250 However, if the referenced string is the empty string, always treat
2251 it as matched, any number of times (otherwise there could be infinite
2252 loops). */
2253
2254 case OP_REF:
2255 {
2256 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2257 ecode += 3;
2258
2259 /* If the reference is unset, there are two possibilities:
2260
2261 (a) In the default, Perl-compatible state, set the length to be longer
2262 than the amount of subject left; this ensures that every attempt at a
2263 match fails. We can't just fail here, because of the possibility of
2264 quantifiers with zero minima.
2265
2266 (b) If the JavaScript compatibility flag is set, set the length to zero
2267 so that the back reference matches an empty string.
2268
2269 Otherwise, set the length to the length of what was matched by the
2270 referenced subpattern. */
2271
2272 if (offset >= offset_top || md->offset_vector[offset] < 0)
2273 length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274 else
2275 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276
2277 /* Set up for repetition, or handle the non-repeated case */
2278
2279 switch (*ecode)
2280 {
2281 case OP_CRSTAR:
2282 case OP_CRMINSTAR:
2283 case OP_CRPLUS:
2284 case OP_CRMINPLUS:
2285 case OP_CRQUERY:
2286 case OP_CRMINQUERY:
2287 c = *ecode++ - OP_CRSTAR;
2288 minimize = (c & 1) != 0;
2289 min = rep_min[c]; /* Pick up values from tables; */
2290 max = rep_max[c]; /* zero for max => infinity */
2291 if (max == 0) max = INT_MAX;
2292 break;
2293
2294 case OP_CRRANGE:
2295 case OP_CRMINRANGE:
2296 minimize = (*ecode == OP_CRMINRANGE);
2297 min = GET2(ecode, 1);
2298 max = GET2(ecode, 3);
2299 if (max == 0) max = INT_MAX;
2300 ecode += 5;
2301 break;
2302
2303 default: /* No repeat follows */
2304 if (!match_ref(offset, eptr, length, md, ims))
2305 {
2306 CHECK_PARTIAL();
2307 MRRETURN(MATCH_NOMATCH);
2308 }
2309 eptr += length;
2310 continue; /* With the main loop */
2311 }
2312
2313 /* If the length of the reference is zero, just continue with the
2314 main loop. */
2315
2316 if (length == 0) continue;
2317
2318 /* First, ensure the minimum number of matches are present. We get back
2319 the length of the reference string explicitly rather than passing the
2320 address of eptr, so that eptr can be a register variable. */
2321
2322 for (i = 1; i <= min; i++)
2323 {
2324 if (!match_ref(offset, eptr, length, md, ims))
2325 {
2326 CHECK_PARTIAL();
2327 MRRETURN(MATCH_NOMATCH);
2328 }
2329 eptr += length;
2330 }
2331
2332 /* If min = max, continue at the same level without recursion.
2333 They are not both allowed to be zero. */
2334
2335 if (min == max) continue;
2336
2337 /* If minimizing, keep trying and advancing the pointer */
2338
2339 if (minimize)
2340 {
2341 for (fi = min;; fi++)
2342 {
2343 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2344 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2346 if (!match_ref(offset, eptr, length, md, ims))
2347 {
2348 CHECK_PARTIAL();
2349 MRRETURN(MATCH_NOMATCH);
2350 }
2351 eptr += length;
2352 }
2353 /* Control never gets here */
2354 }
2355
2356 /* If maximizing, find the longest string and work backwards */
2357
2358 else
2359 {
2360 pp = eptr;
2361 for (i = min; i < max; i++)
2362 {
2363 if (!match_ref(offset, eptr, length, md, ims))
2364 {
2365 CHECK_PARTIAL();
2366 break;
2367 }
2368 eptr += length;
2369 }
2370 while (eptr >= pp)
2371 {
2372 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2373 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2374 eptr -= length;
2375 }
2376 MRRETURN(MATCH_NOMATCH);
2377 }
2378 }
2379 /* Control never gets here */
2380
2381 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2382 used when all the characters in the class have values in the range 0-255,
2383 and either the matching is caseful, or the characters are in the range
2384 0-127 when UTF-8 processing is enabled. The only difference between
2385 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2386 encountered.
2387
2388 First, look past the end of the item to see if there is repeat information
2389 following. Then obey similar code to character type repeats - written out
2390 again for speed. */
2391
2392 case OP_NCLASS:
2393 case OP_CLASS:
2394 {
2395 data = ecode + 1; /* Save for matching */
2396 ecode += 33; /* Advance past the item */
2397
2398 switch (*ecode)
2399 {
2400 case OP_CRSTAR:
2401 case OP_CRMINSTAR:
2402 case OP_CRPLUS:
2403 case OP_CRMINPLUS:
2404 case OP_CRQUERY:
2405 case OP_CRMINQUERY:
2406 c = *ecode++ - OP_CRSTAR;
2407 minimize = (c & 1) != 0;
2408 min = rep_min[c]; /* Pick up values from tables; */
2409 max = rep_max[c]; /* zero for max => infinity */
2410 if (max == 0) max = INT_MAX;
2411 break;
2412
2413 case OP_CRRANGE:
2414 case OP_CRMINRANGE:
2415 minimize = (*ecode == OP_CRMINRANGE);
2416 min = GET2(ecode, 1);
2417 max = GET2(ecode, 3);
2418 if (max == 0) max = INT_MAX;
2419 ecode += 5;
2420 break;
2421
2422 default: /* No repeat follows */
2423 min = max = 1;
2424 break;
2425 }
2426
2427 /* First, ensure the minimum number of matches are present. */
2428
2429 #ifdef SUPPORT_UTF8
2430 /* UTF-8 mode */
2431 if (utf8)
2432 {
2433 for (i = 1; i <= min; i++)
2434 {
2435 if (eptr >= md->end_subject)
2436 {
2437 SCHECK_PARTIAL();
2438 MRRETURN(MATCH_NOMATCH);
2439 }
2440 GETCHARINC(c, eptr);
2441 if (c > 255)
2442 {
2443 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2444 }
2445 else
2446 {
2447 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448 }
2449 }
2450 }
2451 else
2452 #endif
2453 /* Not UTF-8 mode */
2454 {
2455 for (i = 1; i <= min; i++)
2456 {
2457 if (eptr >= md->end_subject)
2458 {
2459 SCHECK_PARTIAL();
2460 MRRETURN(MATCH_NOMATCH);
2461 }
2462 c = *eptr++;
2463 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2464 }
2465 }
2466
2467 /* If max == min we can continue with the main loop without the
2468 need to recurse. */
2469
2470 if (min == max) continue;
2471
2472 /* If minimizing, keep testing the rest of the expression and advancing
2473 the pointer while it matches the class. */
2474
2475 if (minimize)
2476 {
2477 #ifdef SUPPORT_UTF8
2478 /* UTF-8 mode */
2479 if (utf8)
2480 {
2481 for (fi = min;; fi++)
2482 {
2483 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2484 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2485 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2486 if (eptr >= md->end_subject)
2487 {
2488 SCHECK_PARTIAL();
2489 MRRETURN(MATCH_NOMATCH);
2490 }
2491 GETCHARINC(c, eptr);
2492 if (c > 255)
2493 {
2494 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2495 }
2496 else
2497 {
2498 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2499 }
2500 }
2501 }
2502 else
2503 #endif
2504 /* Not UTF-8 mode */
2505 {
2506 for (fi = min;; fi++)
2507 {
2508 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2509 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2510 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2511 if (eptr >= md->end_subject)
2512 {
2513 SCHECK_PARTIAL();
2514 MRRETURN(MATCH_NOMATCH);
2515 }
2516 c = *eptr++;
2517 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2518 }
2519 }
2520 /* Control never gets here */
2521 }
2522
2523 /* If maximizing, find the longest possible run, then work backwards. */
2524
2525 else
2526 {
2527 pp = eptr;
2528
2529 #ifdef SUPPORT_UTF8
2530 /* UTF-8 mode */
2531 if (utf8)
2532 {
2533 for (i = min; i < max; i++)
2534 {
2535 int len = 1;
2536 if (eptr >= md->end_subject)
2537 {
2538 SCHECK_PARTIAL();
2539 break;
2540 }
2541 GETCHARLEN(c, eptr, len);
2542 if (c > 255)
2543 {
2544 if (op == OP_CLASS) break;
2545 }
2546 else
2547 {
2548 if ((data[c/8] & (1 << (c&7))) == 0) break;
2549 }
2550 eptr += len;
2551 }
2552 for (;;)
2553 {
2554 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2555 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2556 if (eptr-- == pp) break; /* Stop if tried at original pos */
2557 BACKCHAR(eptr);
2558 }
2559 }
2560 else
2561 #endif
2562 /* Not UTF-8 mode */
2563 {
2564 for (i = min; i < max; i++)
2565 {
2566 if (eptr >= md->end_subject)
2567 {
2568 SCHECK_PARTIAL();
2569 break;
2570 }
2571 c = *eptr;
2572 if ((data[c/8] & (1 << (c&7))) == 0) break;
2573 eptr++;
2574 }
2575 while (eptr >= pp)
2576 {
2577 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2578 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2579 eptr--;
2580 }
2581 }
2582
2583 MRRETURN(MATCH_NOMATCH);
2584 }
2585 }
2586 /* Control never gets here */
2587
2588
2589 /* Match an extended character class. This opcode is encountered only
2590 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2591 mode, because Unicode properties are supported in non-UTF-8 mode. */
2592
2593 #ifdef SUPPORT_UTF8
2594 case OP_XCLASS:
2595 {
2596 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2597 ecode += GET(ecode, 1); /* Advance past the item */
2598
2599 switch (*ecode)
2600 {
2601 case OP_CRSTAR:
2602 case OP_CRMINSTAR:
2603 case OP_CRPLUS:
2604 case OP_CRMINPLUS:
2605 case OP_CRQUERY:
2606 case OP_CRMINQUERY:
2607 c = *ecode++ - OP_CRSTAR;
2608 minimize = (c & 1) != 0;
2609 min = rep_min[c]; /* Pick up values from tables; */
2610 max = rep_max[c]; /* zero for max => infinity */
2611 if (max == 0) max = INT_MAX;
2612 break;
2613
2614 case OP_CRRANGE:
2615 case OP_CRMINRANGE:
2616 minimize = (*ecode == OP_CRMINRANGE);
2617 min = GET2(ecode, 1);
2618 max = GET2(ecode, 3);
2619 if (max == 0) max = INT_MAX;
2620 ecode += 5;
2621 break;
2622
2623 default: /* No repeat follows */
2624 min = max = 1;
2625 break;
2626 }
2627
2628 /* First, ensure the minimum number of matches are present. */
2629
2630 for (i = 1; i <= min; i++)
2631 {
2632 if (eptr >= md->end_subject)
2633 {
2634 SCHECK_PARTIAL();
2635 MRRETURN(MATCH_NOMATCH);
2636 }
2637 GETCHARINCTEST(c, eptr);
2638 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2639 }
2640
2641 /* If max == min we can continue with the main loop without the
2642 need to recurse. */
2643
2644 if (min == max) continue;
2645
2646 /* If minimizing, keep testing the rest of the expression and advancing
2647 the pointer while it matches the class. */
2648
2649 if (minimize)
2650 {
2651 for (fi = min;; fi++)
2652 {
2653 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2654 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2655 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2656 if (eptr >= md->end_subject)
2657 {
2658 SCHECK_PARTIAL();
2659 MRRETURN(MATCH_NOMATCH);
2660 }
2661 GETCHARINCTEST(c, eptr);
2662 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2663 }
2664 /* Control never gets here */
2665 }
2666
2667 /* If maximizing, find the longest possible run, then work backwards. */
2668
2669 else
2670 {
2671 pp = eptr;
2672 for (i = min; i < max; i++)
2673 {
2674 int len = 1;
2675 if (eptr >= md->end_subject)
2676 {
2677 SCHECK_PARTIAL();
2678 break;
2679 }
2680 GETCHARLENTEST(c, eptr, len);
2681 if (!_pcre_xclass(c, data)) break;
2682 eptr += len;
2683 }
2684 for(;;)
2685 {
2686 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2687 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688 if (eptr-- == pp) break; /* Stop if tried at original pos */
2689 if (utf8) BACKCHAR(eptr);
2690 }
2691 MRRETURN(MATCH_NOMATCH);
2692 }
2693
2694 /* Control never gets here */
2695 }
2696 #endif /* End of XCLASS */
2697
2698 /* Match a single character, casefully */
2699
2700 case OP_CHAR:
2701 #ifdef SUPPORT_UTF8
2702 if (utf8)
2703 {
2704 length = 1;
2705 ecode++;
2706 GETCHARLEN(fc, ecode, length);
2707 if (length > md->end_subject - eptr)
2708 {
2709 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2710 MRRETURN(MATCH_NOMATCH);
2711 }
2712 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2713 }
2714 else
2715 #endif
2716
2717 /* Non-UTF-8 mode */
2718 {
2719 if (md->end_subject - eptr < 1)
2720 {
2721 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2722 MRRETURN(MATCH_NOMATCH);
2723 }
2724 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2725 ecode += 2;
2726 }
2727 break;
2728
2729 /* Match a single character, caselessly */
2730
2731 case OP_CHARNC:
2732 #ifdef SUPPORT_UTF8
2733 if (utf8)
2734 {
2735 length = 1;
2736 ecode++;
2737 GETCHARLEN(fc, ecode, length);
2738
2739 if (length > md->end_subject - eptr)
2740 {
2741 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2742 MRRETURN(MATCH_NOMATCH);
2743 }
2744
2745 /* If the pattern character's value is < 128, we have only one byte, and
2746 can use the fast lookup table. */
2747
2748 if (fc < 128)
2749 {
2750 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2751 }
2752
2753 /* Otherwise we must pick up the subject character */
2754
2755 else
2756 {
2757 unsigned int dc;
2758 GETCHARINC(dc, eptr);
2759 ecode += length;
2760
2761 /* If we have Unicode property support, we can use it to test the other
2762 case of the character, if there is one. */
2763
2764 if (fc != dc)
2765 {
2766 #ifdef SUPPORT_UCP
2767 if (dc != UCD_OTHERCASE(fc))
2768 #endif
2769 MRRETURN(MATCH_NOMATCH);
2770 }
2771 }
2772 }
2773 else
2774 #endif /* SUPPORT_UTF8 */
2775
2776 /* Non-UTF-8 mode */
2777 {
2778 if (md->end_subject - eptr < 1)
2779 {
2780 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2781 MRRETURN(MATCH_NOMATCH);
2782 }
2783 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2784 ecode += 2;
2785 }
2786 break;
2787
2788 /* Match a single character repeatedly. */
2789
2790 case OP_EXACT:
2791 min = max = GET2(ecode, 1);
2792 ecode += 3;
2793 goto REPEATCHAR;
2794
2795 case OP_POSUPTO:
2796 possessive = TRUE;
2797 /* Fall through */
2798
2799 case OP_UPTO:
2800 case OP_MINUPTO:
2801 min = 0;
2802 max = GET2(ecode, 1);
2803 minimize = *ecode == OP_MINUPTO;
2804 ecode += 3;
2805 goto REPEATCHAR;
2806
2807 case OP_POSSTAR:
2808 possessive = TRUE;
2809 min = 0;
2810 max = INT_MAX;
2811 ecode++;
2812 goto REPEATCHAR;
2813
2814 case OP_POSPLUS:
2815 possessive = TRUE;
2816 min = 1;
2817 max = INT_MAX;
2818 ecode++;
2819 goto REPEATCHAR;
2820
2821 case OP_POSQUERY:
2822 possessive = TRUE;
2823 min = 0;
2824 max = 1;
2825 ecode++;
2826 goto REPEATCHAR;
2827
2828 case OP_STAR:
2829 case OP_MINSTAR:
2830 case OP_PLUS:
2831 case OP_MINPLUS:
2832 case OP_QUERY:
2833 case OP_MINQUERY:
2834 c = *ecode++ - OP_STAR;
2835 minimize = (c & 1) != 0;
2836
2837 min = rep_min[c]; /* Pick up values from tables; */
2838 max = rep_max[c]; /* zero for max => infinity */
2839 if (max == 0) max = INT_MAX;
2840
2841 /* Common code for all repeated single-character matches. */
2842
2843 REPEATCHAR:
2844 #ifdef SUPPORT_UTF8
2845 if (utf8)
2846 {
2847 length = 1;
2848 charptr = ecode;
2849 GETCHARLEN(fc, ecode, length);
2850 ecode += length;
2851
2852 /* Handle multibyte character matching specially here. There is
2853 support for caseless matching if UCP support is present. */
2854
2855 if (length > 1)
2856 {
2857 #ifdef SUPPORT_UCP
2858 unsigned int othercase;
2859 if ((ims & PCRE_CASELESS) != 0 &&
2860 (othercase = UCD_OTHERCASE(fc)) != fc)
2861 oclength = _pcre_ord2utf8(othercase, occhars);
2862 else oclength = 0;
2863 #endif /* SUPPORT_UCP */
2864
2865 for (i = 1; i <= min; i++)
2866 {
2867 if (eptr <= md->end_subject - length &&
2868 memcmp(eptr, charptr, length) == 0) eptr += length;
2869 #ifdef SUPPORT_UCP
2870 else if (oclength > 0 &&
2871 eptr <= md->end_subject - oclength &&
2872 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2873 #endif /* SUPPORT_UCP */
2874 else
2875 {
2876 CHECK_PARTIAL();
2877 MRRETURN(MATCH_NOMATCH);
2878 }
2879 }
2880
2881 if (min == max) continue;
2882
2883 if (minimize)
2884 {
2885 for (fi = min;; fi++)
2886 {
2887 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2888 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890 if (eptr <= md->end_subject - length &&
2891 memcmp(eptr, charptr, length) == 0) eptr += length;
2892 #ifdef SUPPORT_UCP
2893 else if (oclength > 0 &&
2894 eptr <= md->end_subject - oclength &&
2895 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2896 #endif /* SUPPORT_UCP */
2897 else
2898 {
2899 CHECK_PARTIAL();
2900 MRRETURN(MATCH_NOMATCH);
2901 }
2902 }
2903 /* Control never gets here */
2904 }
2905
2906 else /* Maximize */
2907 {
2908 pp = eptr;
2909 for (i = min; i < max; i++)
2910 {
2911 if (eptr <= md->end_subject - length &&
2912 memcmp(eptr, charptr, length) == 0) eptr += length;
2913 #ifdef SUPPORT_UCP
2914 else if (oclength > 0 &&
2915 eptr <= md->end_subject - oclength &&
2916 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2917 #endif /* SUPPORT_UCP */
2918 else
2919 {
2920 CHECK_PARTIAL();
2921 break;
2922 }
2923 }
2924
2925 if (possessive) continue;
2926
2927 for(;;)
2928 {
2929 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2930 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2931 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2932 #ifdef SUPPORT_UCP
2933 eptr--;
2934 BACKCHAR(eptr);
2935 #else /* without SUPPORT_UCP */
2936 eptr -= length;
2937 #endif /* SUPPORT_UCP */
2938 }
2939 }
2940 /* Control never gets here */
2941 }
2942
2943 /* If the length of a UTF-8 character is 1, we fall through here, and
2944 obey the code as for non-UTF-8 characters below, though in this case the
2945 value of fc will always be < 128. */
2946 }
2947 else
2948 #endif /* SUPPORT_UTF8 */
2949
2950 /* When not in UTF-8 mode, load a single-byte character. */
2951
2952 fc = *ecode++;
2953
2954 /* The value of fc at this point is always less than 256, though we may or
2955 may not be in UTF-8 mode. The code is duplicated for the caseless and
2956 caseful cases, for speed, since matching characters is likely to be quite
2957 common. First, ensure the minimum number of matches are present. If min =
2958 max, continue at the same level without recursing. Otherwise, if
2959 minimizing, keep trying the rest of the expression and advancing one
2960 matching character if failing, up to the maximum. Alternatively, if
2961 maximizing, find the maximum number of characters and work backwards. */
2962
2963 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2964 max, eptr));
2965
2966 if ((ims & PCRE_CASELESS) != 0)
2967 {
2968 fc = md->lcc[fc];
2969 for (i = 1; i <= min; i++)
2970 {
2971 if (eptr >= md->end_subject)
2972 {
2973 SCHECK_PARTIAL();
2974 MRRETURN(MATCH_NOMATCH);
2975 }
2976 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2977 }
2978 if (min == max) continue;
2979 if (minimize)
2980 {
2981 for (fi = min;; fi++)
2982 {
2983 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2984 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2985 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2986 if (eptr >= md->end_subject)
2987 {
2988 SCHECK_PARTIAL();
2989 MRRETURN(MATCH_NOMATCH);
2990 }
2991 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2992 }
2993 /* Control never gets here */
2994 }
2995 else /* Maximize */
2996 {
2997 pp = eptr;
2998 for (i = min; i < max; i++)
2999 {
3000 if (eptr >= md->end_subject)
3001 {
3002 SCHECK_PARTIAL();
3003 break;
3004 }
3005 if (fc != md->lcc[*eptr]) break;
3006 eptr++;
3007 }
3008
3009 if (possessive) continue;
3010
3011 while (eptr >= pp)
3012 {
3013 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3014 eptr--;
3015 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3016 }
3017 MRRETURN(MATCH_NOMATCH);
3018 }
3019 /* Control never gets here */
3020 }
3021
3022 /* Caseful comparisons (includes all multi-byte characters) */
3023
3024 else
3025 {
3026 for (i = 1; i <= min; i++)
3027 {
3028 if (eptr >= md->end_subject)
3029 {
3030 SCHECK_PARTIAL();
3031 MRRETURN(MATCH_NOMATCH);
3032 }
3033 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3034 }
3035
3036 if (min == max) continue;
3037
3038 if (minimize)
3039 {
3040 for (fi = min;; fi++)
3041 {
3042 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3043 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3044 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3045 if (eptr >= md->end_subject)
3046 {
3047 SCHECK_PARTIAL();
3048 MRRETURN(MATCH_NOMATCH);
3049 }
3050 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051 }
3052 /* Control never gets here */
3053 }
3054 else /* Maximize */
3055 {
3056 pp = eptr;
3057 for (i = min; i < max; i++)
3058 {
3059 if (eptr >= md->end_subject)
3060 {
3061 SCHECK_PARTIAL();
3062 break;
3063 }
3064 if (fc != *eptr) break;
3065 eptr++;
3066 }
3067 if (possessive) continue;
3068
3069 while (eptr >= pp)
3070 {
3071 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3072 eptr--;
3073 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074 }
3075 MRRETURN(MATCH_NOMATCH);
3076 }
3077 }
3078 /* Control never gets here */
3079
3080 /* Match a negated single one-byte character. The character we are
3081 checking can be multibyte. */
3082
3083 case OP_NOT:
3084 if (eptr >= md->end_subject)
3085 {
3086 SCHECK_PARTIAL();
3087 MRRETURN(MATCH_NOMATCH);
3088 }
3089 ecode++;
3090 GETCHARINCTEST(c, eptr);
3091 if ((ims & PCRE_CASELESS) != 0)
3092 {
3093 #ifdef SUPPORT_UTF8
3094 if (c < 256)
3095 #endif
3096 c = md->lcc[c];
3097 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3098 }
3099 else
3100 {
3101 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3102 }
3103 break;
3104
3105 /* Match a negated single one-byte character repeatedly. This is almost a
3106 repeat of the code for a repeated single character, but I haven't found a
3107 nice way of commoning these up that doesn't require a test of the
3108 positive/negative option for each character match. Maybe that wouldn't add
3109 very much to the time taken, but character matching *is* what this is all
3110 about... */
3111
3112 case OP_NOTEXACT:
3113 min = max = GET2(ecode, 1);
3114 ecode += 3;
3115 goto REPEATNOTCHAR;
3116
3117 case OP_NOTUPTO:
3118 case OP_NOTMINUPTO:
3119 min = 0;
3120 max = GET2(ecode, 1);
3121 minimize = *ecode == OP_NOTMINUPTO;
3122 ecode += 3;
3123 goto REPEATNOTCHAR;
3124
3125 case OP_NOTPOSSTAR:
3126 possessive = TRUE;
3127 min = 0;
3128 max = INT_MAX;
3129 ecode++;
3130 goto REPEATNOTCHAR;
3131
3132 case OP_NOTPOSPLUS:
3133 possessive = TRUE;
3134 min = 1;
3135 max = INT_MAX;
3136 ecode++;
3137 goto REPEATNOTCHAR;
3138
3139 case OP_NOTPOSQUERY:
3140 possessive = TRUE;
3141 min = 0;
3142 max = 1;
3143 ecode++;
3144 goto REPEATNOTCHAR;
3145
3146 case OP_NOTPOSUPTO:
3147 possessive = TRUE;
3148 min = 0;
3149 max = GET2(ecode, 1);
3150 ecode += 3;
3151 goto REPEATNOTCHAR;
3152
3153 case OP_NOTSTAR:
3154 case OP_NOTMINSTAR:
3155 case OP_NOTPLUS:
3156 case OP_NOTMINPLUS:
3157 case OP_NOTQUERY:
3158 case OP_NOTMINQUERY:
3159 c = *ecode++ - OP_NOTSTAR;
3160 minimize = (c & 1) != 0;
3161 min = rep_min[c]; /* Pick up values from tables; */
3162 max = rep_max[c]; /* zero for max => infinity */
3163 if (max == 0) max = INT_MAX;
3164
3165 /* Common code for all repeated single-byte matches. */
3166
3167 REPEATNOTCHAR:
3168 fc = *ecode++;
3169
3170 /* The code is duplicated for the caseless and caseful cases, for speed,
3171 since matching characters is likely to be quite common. First, ensure the
3172 minimum number of matches are present. If min = max, continue at the same
3173 level without recursing. Otherwise, if minimizing, keep trying the rest of
3174 the expression and advancing one matching character if failing, up to the
3175 maximum. Alternatively, if maximizing, find the maximum number of
3176 characters and work backwards. */
3177
3178 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3179 max, eptr));
3180
3181 if ((ims & PCRE_CASELESS) != 0)
3182 {
3183 fc = md->lcc[fc];
3184
3185 #ifdef SUPPORT_UTF8
3186 /* UTF-8 mode */
3187 if (utf8)
3188 {
3189 register unsigned int d;
3190 for (i = 1; i <= min; i++)
3191 {
3192 if (eptr >= md->end_subject)
3193 {
3194 SCHECK_PARTIAL();
3195 MRRETURN(MATCH_NOMATCH);
3196 }
3197 GETCHARINC(d, eptr);
3198 if (d < 256) d = md->lcc[d];
3199 if (fc == d) MRRETURN(MATCH_NOMATCH);
3200 }
3201 }
3202 else
3203 #endif
3204
3205 /* Not UTF-8 mode */
3206 {
3207 for (i = 1; i <= min; i++)
3208 {
3209 if (eptr >= md->end_subject)
3210 {
3211 SCHECK_PARTIAL();
3212 MRRETURN(MATCH_NOMATCH);
3213 }
3214 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3215 }
3216 }
3217
3218 if (min == max) continue;
3219
3220 if (minimize)
3221 {
3222 #ifdef SUPPORT_UTF8
3223 /* UTF-8 mode */
3224 if (utf8)
3225 {
3226 register unsigned int d;
3227 for (fi = min;; fi++)
3228 {
3229 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3230 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3231 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3232 if (eptr >= md->end_subject)
3233 {
3234 SCHECK_PARTIAL();
3235 MRRETURN(MATCH_NOMATCH);
3236 }
3237 GETCHARINC(d, eptr);
3238 if (d < 256) d = md->lcc[d];
3239 if (fc == d) MRRETURN(MATCH_NOMATCH);
3240 }
3241 }
3242 else
3243 #endif
3244 /* Not UTF-8 mode */
3245 {
3246 for (fi = min;; fi++)
3247 {
3248 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3249 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3250 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3251 if (eptr >= md->end_subject)
3252 {
3253 SCHECK_PARTIAL();
3254 MRRETURN(MATCH_NOMATCH);
3255 }
3256 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3257 }
3258 }
3259 /* Control never gets here */
3260 }
3261
3262 /* Maximize case */
3263
3264 else
3265 {
3266 pp = eptr;
3267
3268 #ifdef SUPPORT_UTF8
3269 /* UTF-8 mode */
3270 if (utf8)
3271 {
3272 register unsigned int d;
3273 for (i = min; i < max; i++)
3274 {
3275 int len = 1;
3276 if (eptr >= md->end_subject)
3277 {
3278 SCHECK_PARTIAL();
3279 break;
3280 }
3281 GETCHARLEN(d, eptr, len);
3282 if (d < 256) d = md->lcc[d];
3283 if (fc == d) break;
3284 eptr += len;
3285 }
3286 if (possessive) continue;
3287 for(;;)
3288 {
3289 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3290 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3291 if (eptr-- == pp) break; /* Stop if tried at original pos */
3292 BACKCHAR(eptr);
3293 }
3294 }
3295 else
3296 #endif
3297 /* Not UTF-8 mode */
3298 {
3299 for (i = min; i < max; i++)
3300 {
3301 if (eptr >= md->end_subject)
3302 {
3303 SCHECK_PARTIAL();
3304 break;
3305 }
3306 if (fc == md->lcc[*eptr]) break;
3307 eptr++;
3308 }
3309 if (possessive) continue;
3310 while (eptr >= pp)
3311 {
3312 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3313 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3314 eptr--;
3315 }
3316 }
3317
3318 MRRETURN(MATCH_NOMATCH);
3319 }
3320 /* Control never gets here */
3321 }
3322
3323 /* Caseful comparisons */
3324
3325 else
3326 {
3327 #ifdef SUPPORT_UTF8
3328 /* UTF-8 mode */
3329 if (utf8)
3330 {
3331 register unsigned int d;
3332 for (i = 1; i <= min; i++)
3333 {
3334 if (eptr >= md->end_subject)
3335 {
3336 SCHECK_PARTIAL();
3337 MRRETURN(MATCH_NOMATCH);
3338 }
3339 GETCHARINC(d, eptr);
3340 if (fc == d) MRRETURN(MATCH_NOMATCH);
3341 }
3342 }
3343 else
3344 #endif
3345 /* Not UTF-8 mode */
3346 {
3347 for (i = 1; i <= min; i++)
3348 {
3349 if (eptr >= md->end_subject)
3350 {
3351 SCHECK_PARTIAL();
3352 MRRETURN(MATCH_NOMATCH);
3353 }
3354 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3355 }
3356 }
3357
3358 if (min == max) continue;
3359
3360 if (minimize)
3361 {
3362 #ifdef SUPPORT_UTF8
3363 /* UTF-8 mode */
3364 if (utf8)
3365 {
3366 register unsigned int d;
3367 for (fi = min;; fi++)
3368 {
3369 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3370 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3371 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3372 if (eptr >= md->end_subject)
3373 {
3374 SCHECK_PARTIAL();
3375 MRRETURN(MATCH_NOMATCH);
3376 }
3377 GETCHARINC(d, eptr);
3378 if (fc == d) MRRETURN(MATCH_NOMATCH);
3379 }
3380 }
3381 else
3382 #endif
3383 /* Not UTF-8 mode */
3384 {
3385 for (fi = min;; fi++)
3386 {
3387 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3388 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390 if (eptr >= md->end_subject)
3391 {
3392 SCHECK_PARTIAL();
3393 MRRETURN(MATCH_NOMATCH);
3394 }
3395 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3396 }
3397 }
3398 /* Control never gets here */
3399 }
3400
3401 /* Maximize case */
3402
3403 else
3404 {
3405 pp = eptr;
3406
3407 #ifdef SUPPORT_UTF8
3408 /* UTF-8 mode */
3409 if (utf8)
3410 {
3411 register unsigned int d;
3412 for (i = min; i < max; i++)
3413 {
3414 int len = 1;
3415 if (eptr >= md->end_subject)
3416 {
3417 SCHECK_PARTIAL();
3418 break;
3419 }
3420 GETCHARLEN(d, eptr, len);
3421 if (fc == d) break;
3422 eptr += len;
3423 }
3424 if (possessive) continue;
3425 for(;;)
3426 {
3427 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3428 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3429 if (eptr-- == pp) break; /* Stop if tried at original pos */
3430 BACKCHAR(eptr);
3431 }
3432 }
3433 else
3434 #endif
3435 /* Not UTF-8 mode */
3436 {
3437 for (i = min; i < max; i++)
3438 {
3439 if (eptr >= md->end_subject)
3440 {
3441 SCHECK_PARTIAL();
3442 break;
3443 }
3444 if (fc == *eptr) break;
3445 eptr++;
3446 }
3447 if (possessive) continue;
3448 while (eptr >= pp)
3449 {
3450 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3451 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3452 eptr--;
3453 }
3454 }
3455
3456 MRRETURN(MATCH_NOMATCH);
3457 }
3458 }
3459 /* Control never gets here */
3460
3461 /* Match a single character type repeatedly; several different opcodes
3462 share code. This is very similar to the code for single characters, but we
3463 repeat it in the interests of efficiency. */
3464
3465 case OP_TYPEEXACT:
3466 min = max = GET2(ecode, 1);
3467 minimize = TRUE;
3468 ecode += 3;
3469 goto REPEATTYPE;
3470
3471 case OP_TYPEUPTO:
3472 case OP_TYPEMINUPTO:
3473 min = 0;
3474 max = GET2(ecode, 1);
3475 minimize = *ecode == OP_TYPEMINUPTO;
3476 ecode += 3;
3477 goto REPEATTYPE;
3478
3479 case OP_TYPEPOSSTAR:
3480 possessive = TRUE;
3481 min = 0;
3482 max = INT_MAX;
3483 ecode++;
3484 goto REPEATTYPE;
3485
3486 case OP_TYPEPOSPLUS:
3487 possessive = TRUE;
3488 min = 1;
3489 max = INT_MAX;
3490 ecode++;
3491 goto REPEATTYPE;
3492
3493 case OP_TYPEPOSQUERY:
3494 possessive = TRUE;
3495 min = 0;
3496 max = 1;
3497 ecode++;
3498 goto REPEATTYPE;
3499
3500 case OP_TYPEPOSUPTO:
3501 possessive = TRUE;
3502 min = 0;
3503 max = GET2(ecode, 1);
3504 ecode += 3;
3505 goto REPEATTYPE;
3506
3507 case OP_TYPESTAR:
3508 case OP_TYPEMINSTAR:
3509 case OP_TYPEPLUS:
3510 case OP_TYPEMINPLUS:
3511 case OP_TYPEQUERY:
3512 case OP_TYPEMINQUERY:
3513 c = *ecode++ - OP_TYPESTAR;
3514 minimize = (c & 1) != 0;
3515 min = rep_min[c]; /* Pick up values from tables; */
3516 max = rep_max[c]; /* zero for max => infinity */
3517 if (max == 0) max = INT_MAX;
3518
3519 /* Common code for all repeated single character type matches. Note that
3520 in UTF-8 mode, '.' matches a character of any length, but for the other
3521 character types, the valid characters are all one-byte long. */
3522
3523 REPEATTYPE:
3524 ctype = *ecode++; /* Code for the character type */
3525
3526 #ifdef SUPPORT_UCP
3527 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3528 {
3529 prop_fail_result = ctype == OP_NOTPROP;
3530 prop_type = *ecode++;
3531 prop_value = *ecode++;
3532 }
3533 else prop_type = -1;
3534 #endif
3535
3536 /* First, ensure the minimum number of matches are present. Use inline
3537 code for maximizing the speed, and do the type test once at the start
3538 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3539 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3540 and single-bytes. */
3541
3542 if (min > 0)
3543 {
3544 #ifdef SUPPORT_UCP
3545 if (prop_type >= 0)
3546 {
3547 switch(prop_type)
3548 {
3549 case PT_ANY:
3550 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3551 for (i = 1; i <= min; i++)
3552 {
3553 if (eptr >= md->end_subject)
3554 {
3555 SCHECK_PARTIAL();
3556 MRRETURN(MATCH_NOMATCH);
3557 }
3558 GETCHARINCTEST(c, eptr);
3559 }
3560 break;
3561
3562 case PT_LAMP:
3563 for (i = 1; i <= min; i++)
3564 {
3565 if (eptr >= md->end_subject)
3566 {
3567 SCHECK_PARTIAL();
3568 MRRETURN(MATCH_NOMATCH);
3569 }
3570 GETCHARINCTEST(c, eptr);
3571 prop_chartype = UCD_CHARTYPE(c);
3572 if ((prop_chartype == ucp_Lu ||
3573 prop_chartype == ucp_Ll ||
3574 prop_chartype == ucp_Lt) == prop_fail_result)
3575 MRRETURN(MATCH_NOMATCH);
3576 }
3577 break;
3578
3579 case PT_GC:
3580 for (i = 1; i <= min; i++)
3581 {
3582 if (eptr >= md->end_subject)
3583 {
3584 SCHECK_PARTIAL();
3585 MRRETURN(MATCH_NOMATCH);
3586 }
3587 GETCHARINCTEST(c, eptr);
3588 prop_category = UCD_CATEGORY(c);
3589 if ((prop_category == prop_value) == prop_fail_result)
3590 MRRETURN(MATCH_NOMATCH);
3591 }
3592 break;
3593
3594 case PT_PC:
3595 for (i = 1; i <= min; i++)
3596 {
3597 if (eptr >= md->end_subject)
3598 {
3599 SCHECK_PARTIAL();
3600 MRRETURN(MATCH_NOMATCH);
3601 }
3602 GETCHARINCTEST(c, eptr);
3603 prop_chartype = UCD_CHARTYPE(c);
3604 if ((prop_chartype == prop_value) == prop_fail_result)
3605 MRRETURN(MATCH_NOMATCH);
3606 }
3607 break;
3608
3609 case PT_SC:
3610 for (i = 1; i <= min; i++)
3611 {
3612 if (eptr >= md->end_subject)
3613 {
3614 SCHECK_PARTIAL();
3615 MRRETURN(MATCH_NOMATCH);
3616 }
3617 GETCHARINCTEST(c, eptr);
3618 prop_script = UCD_SCRIPT(c);
3619 if ((prop_script == prop_value) == prop_fail_result)
3620 MRRETURN(MATCH_NOMATCH);
3621 }
3622 break;
3623
3624 case PT_ALNUM:
3625 for (i = 1; i <= min; i++)
3626 {
3627 if (eptr >= md->end_subject)
3628 {
3629 SCHECK_PARTIAL();
3630 MRRETURN(MATCH_NOMATCH);
3631 }
3632 GETCHARINCTEST(c, eptr);
3633 prop_category = UCD_CATEGORY(c);
3634 if ((prop_category == ucp_L || prop_category == ucp_N)
3635 == prop_fail_result)
3636 MRRETURN(MATCH_NOMATCH);
3637 }
3638 break;
3639
3640 case PT_SPACE: /* Perl space */
3641 for (i = 1; i <= min; i++)
3642 {
3643 if (eptr >= md->end_subject)
3644 {
3645 SCHECK_PARTIAL();
3646 MRRETURN(MATCH_NOMATCH);
3647 }
3648 GETCHARINCTEST(c, eptr);
3649 prop_category = UCD_CATEGORY(c);
3650 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651 c == CHAR_FF || c == CHAR_CR)
3652 == prop_fail_result)
3653 MRRETURN(MATCH_NOMATCH);
3654 }
3655 break;
3656
3657 case PT_PXSPACE: /* POSIX space */
3658 for (i = 1; i <= min; i++)
3659 {
3660 if (eptr >= md->end_subject)
3661 {
3662 SCHECK_PARTIAL();
3663 MRRETURN(MATCH_NOMATCH);
3664 }
3665 GETCHARINCTEST(c, eptr);
3666 prop_category = UCD_CATEGORY(c);
3667 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669 == prop_fail_result)
3670 MRRETURN(MATCH_NOMATCH);
3671 }
3672 break;
3673
3674 case PT_WORD:
3675 for (i = 1; i <= min; i++)
3676 {
3677 if (eptr >= md->end_subject)
3678 {
3679 SCHECK_PARTIAL();
3680 MRRETURN(MATCH_NOMATCH);
3681 }
3682 GETCHARINCTEST(c, eptr);
3683 prop_category = UCD_CATEGORY(c);
3684 if ((prop_category == ucp_L || prop_category == ucp_N ||
3685 c == CHAR_UNDERSCORE)
3686 == prop_fail_result)
3687 MRRETURN(MATCH_NOMATCH);
3688 }
3689 break;
3690
3691 /* This should not occur */
3692
3693 default:
3694 RRETURN(PCRE_ERROR_INTERNAL);
3695 }
3696 }
3697
3698 /* Match extended Unicode sequences. We will get here only if the
3699 support is in the binary; otherwise a compile-time error occurs. */
3700
3701 else if (ctype == OP_EXTUNI)
3702 {
3703 for (i = 1; i <= min; i++)
3704 {
3705 if (eptr >= md->end_subject)
3706 {
3707 SCHECK_PARTIAL();
3708 MRRETURN(MATCH_NOMATCH);
3709 }
3710 GETCHARINCTEST(c, eptr);
3711 prop_category = UCD_CATEGORY(c);
3712 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3713 while (eptr < md->end_subject)
3714 {
3715 int len = 1;
3716 if (!utf8) c = *eptr;
3717 else { GETCHARLEN(c, eptr, len); }
3718 prop_category = UCD_CATEGORY(c);
3719 if (prop_category != ucp_M) break;
3720 eptr += len;
3721 }
3722 }
3723 }
3724
3725 else
3726 #endif /* SUPPORT_UCP */
3727
3728 /* Handle all other cases when the coding is UTF-8 */
3729
3730 #ifdef SUPPORT_UTF8
3731 if (utf8) switch(ctype)
3732 {
3733 case OP_ANY:
3734 for (i = 1; i <= min; i++)
3735 {
3736 if (eptr >= md->end_subject)
3737 {
3738 SCHECK_PARTIAL();
3739 MRRETURN(MATCH_NOMATCH);
3740 }
3741 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3742 eptr++;
3743 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3744 }
3745 break;
3746
3747 case OP_ALLANY:
3748 for (i = 1; i <= min; i++)
3749 {
3750 if (eptr >= md->end_subject)
3751 {
3752 SCHECK_PARTIAL();
3753 MRRETURN(MATCH_NOMATCH);
3754 }
3755 eptr++;
3756 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3757 }
3758 break;
3759
3760 case OP_ANYBYTE:
3761 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3762 eptr += min;
3763 break;
3764
3765 case OP_ANYNL:
3766 for (i = 1; i <= min; i++)
3767 {
3768 if (eptr >= md->end_subject)
3769 {
3770 SCHECK_PARTIAL();
3771 MRRETURN(MATCH_NOMATCH);
3772 }
3773 GETCHARINC(c, eptr);
3774 switch(c)
3775 {
3776 default: MRRETURN(MATCH_NOMATCH);
3777 case 0x000d:
3778 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3779 break;
3780
3781 case 0x000a:
3782 break;
3783
3784 case 0x000b:
3785 case 0x000c:
3786 case 0x0085:
3787 case 0x2028:
3788 case 0x2029:
3789 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3790 break;
3791 }
3792 }
3793 break;
3794
3795 case OP_NOT_HSPACE:
3796 for (i = 1; i <= min; i++)
3797 {
3798 if (eptr >= md->end_subject)
3799 {
3800 SCHECK_PARTIAL();
3801 MRRETURN(MATCH_NOMATCH);
3802 }
3803 GETCHARINC(c, eptr);
3804 switch(c)
3805 {
3806 default: break;
3807 case 0x09: /* HT */
3808 case 0x20: /* SPACE */
3809 case 0xa0: /* NBSP */
3810 case 0x1680: /* OGHAM SPACE MARK */
3811 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3812 case 0x2000: /* EN QUAD */
3813 case 0x2001: /* EM QUAD */
3814 case 0x2002: /* EN SPACE */
3815 case 0x2003: /* EM SPACE */
3816 case 0x2004: /* THREE-PER-EM SPACE */
3817 case 0x2005: /* FOUR-PER-EM SPACE */
3818 case 0x2006: /* SIX-PER-EM SPACE */
3819 case 0x2007: /* FIGURE SPACE */
3820 case 0x2008: /* PUNCTUATION SPACE */
3821 case 0x2009: /* THIN SPACE */
3822 case 0x200A: /* HAIR SPACE */
3823 case 0x202f: /* NARROW NO-BREAK SPACE */
3824 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3825 case 0x3000: /* IDEOGRAPHIC SPACE */
3826 MRRETURN(MATCH_NOMATCH);
3827 }
3828 }
3829 break;
3830
3831 case OP_HSPACE:
3832 for (i = 1; i <= min; i++)
3833 {
3834 if (eptr >= md->end_subject)
3835 {
3836 SCHECK_PARTIAL();
3837 MRRETURN(MATCH_NOMATCH);
3838 }
3839 GETCHARINC(c, eptr);
3840 switch(c)
3841 {
3842 default: MRRETURN(MATCH_NOMATCH);
3843 case 0x09: /* HT */
3844 case 0x20: /* SPACE */
3845 case 0xa0: /* NBSP */
3846 case 0x1680: /* OGHAM SPACE MARK */
3847 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3848 case 0x2000: /* EN QUAD */
3849 case 0x2001: /* EM QUAD */
3850 case 0x2002: /* EN SPACE */
3851 case 0x2003: /* EM SPACE */
3852 case 0x2004: /* THREE-PER-EM SPACE */
3853 case 0x2005: /* FOUR-PER-EM SPACE */
3854 case 0x2006: /* SIX-PER-EM SPACE */
3855 case 0x2007: /* FIGURE SPACE */
3856 case 0x2008: /* PUNCTUATION SPACE */
3857 case 0x2009: /* THIN SPACE */
3858 case 0x200A: /* HAIR SPACE */
3859 case 0x202f: /* NARROW NO-BREAK SPACE */
3860 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3861 case 0x3000: /* IDEOGRAPHIC SPACE */
3862 break;
3863 }
3864 }
3865 break;
3866
3867 case OP_NOT_VSPACE:
3868 for (i = 1; i <= min; i++)
3869 {
3870 if (eptr >= md->end_subject)
3871 {
3872 SCHECK_PARTIAL();
3873 MRRETURN(MATCH_NOMATCH);
3874 }
3875 GETCHARINC(c, eptr);
3876 switch(c)
3877 {
3878 default: break;
3879 case 0x0a: /* LF */
3880 case 0x0b: /* VT */
3881 case 0x0c: /* FF */
3882 case 0x0d: /* CR */
3883 case 0x85: /* NEL */
3884 case 0x2028: /* LINE SEPARATOR */
3885 case 0x2029: /* PARAGRAPH SEPARATOR */
3886 MRRETURN(MATCH_NOMATCH);
3887 }
3888 }
3889 break;
3890
3891 case OP_VSPACE:
3892 for (i = 1; i <= min; i++)
3893 {
3894 if (eptr >= md->end_subject)
3895 {
3896 SCHECK_PARTIAL();
3897 MRRETURN(MATCH_NOMATCH);
3898 }
3899 GETCHARINC(c, eptr);
3900 switch(c)
3901 {
3902 default: MRRETURN(MATCH_NOMATCH);
3903 case 0x0a: /* LF */
3904 case 0x0b: /* VT */
3905 case 0x0c: /* FF */
3906 case 0x0d: /* CR */
3907 case 0x85: /* NEL */
3908 case 0x2028: /* LINE SEPARATOR */
3909 case 0x2029: /* PARAGRAPH SEPARATOR */
3910 break;
3911 }
3912 }
3913 break;
3914
3915 case OP_NOT_DIGIT:
3916 for (i = 1; i <= min; i++)
3917 {
3918 if (eptr >= md->end_subject)
3919 {
3920 SCHECK_PARTIAL();
3921 MRRETURN(MATCH_NOMATCH);
3922 }
3923 GETCHARINC(c, eptr);
3924 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3925 MRRETURN(MATCH_NOMATCH);
3926 }
3927 break;
3928
3929 case OP_DIGIT:
3930 for (i = 1; i <= min; i++)
3931 {
3932 if (eptr >= md->end_subject)
3933 {
3934 SCHECK_PARTIAL();
3935 MRRETURN(MATCH_NOMATCH);
3936 }
3937 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3938 MRRETURN(MATCH_NOMATCH);
3939 /* No need to skip more bytes - we know it's a 1-byte character */
3940 }
3941 break;
3942
3943 case OP_NOT_WHITESPACE:
3944 for (i = 1; i <= min; i++)
3945 {
3946 if (eptr >= md->end_subject)
3947 {
3948 SCHECK_PARTIAL();
3949 MRRETURN(MATCH_NOMATCH);
3950 }
3951 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3952 MRRETURN(MATCH_NOMATCH);
3953 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3954 }
3955 break;
3956
3957 case OP_WHITESPACE:
3958 for (i = 1; i <= min; i++)
3959 {
3960 if (eptr >= md->end_subject)
3961 {
3962 SCHECK_PARTIAL();
3963 MRRETURN(MATCH_NOMATCH);
3964 }
3965 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3966 MRRETURN(MATCH_NOMATCH);
3967 /* No need to skip more bytes - we know it's a 1-byte character */
3968 }
3969 break;
3970
3971 case OP_NOT_WORDCHAR:
3972 for (i = 1; i <= min; i++)
3973 {
3974 if (eptr >= md->end_subject)
3975 {
3976 SCHECK_PARTIAL();
3977 MRRETURN(MATCH_NOMATCH);
3978 }
3979 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3980 MRRETURN(MATCH_NOMATCH);
3981 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3982 }
3983 break;
3984
3985 case OP_WORDCHAR:
3986 for (i = 1; i <= min; i++)
3987 {
3988 if (eptr >= md->end_subject)
3989 {
3990 SCHECK_PARTIAL();
3991 MRRETURN(MATCH_NOMATCH);
3992 }
3993 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3994 MRRETURN(MATCH_NOMATCH);
3995 /* No need to skip more bytes - we know it's a 1-byte character */
3996 }
3997 break;
3998
3999 default:
4000 RRETURN(PCRE_ERROR_INTERNAL);
4001 } /* End switch(ctype) */
4002
4003 else
4004 #endif /* SUPPORT_UTF8 */
4005
4006 /* Code for the non-UTF-8 case for minimum matching of operators other
4007 than OP_PROP and OP_NOTPROP. */
4008
4009 switch(ctype)
4010 {
4011 case OP_ANY:
4012 for (i = 1; i <= min; i++)
4013 {
4014 if (eptr >= md->end_subject)
4015 {
4016 SCHECK_PARTIAL();
4017 MRRETURN(MATCH_NOMATCH);
4018 }
4019 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4020 eptr++;
4021 }
4022 break;
4023
4024 case OP_ALLANY:
4025 if (eptr > md->end_subject - min)
4026 {
4027 SCHECK_PARTIAL();
4028 MRRETURN(MATCH_NOMATCH);
4029 }
4030 eptr += min;
4031 break;
4032
4033 case OP_ANYBYTE:
4034 if (eptr > md->end_subject - min)
4035 {
4036 SCHECK_PARTIAL();
4037 MRRETURN(MATCH_NOMATCH);
4038 }
4039 eptr += min;
4040 break;
4041
4042 case OP_ANYNL:
4043 for (i = 1; i <= min; i++)
4044 {
4045 if (eptr >= md->end_subject)
4046 {
4047 SCHECK_PARTIAL();
4048 MRRETURN(MATCH_NOMATCH);
4049 }
4050 switch(*eptr++)
4051 {
4052 default: MRRETURN(MATCH_NOMATCH);
4053 case 0x000d:
4054 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4055 break;
4056 case 0x000a:
4057 break;
4058
4059 case 0x000b:
4060 case 0x000c:
4061 case 0x0085:
4062 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4063 break;
4064 }
4065 }
4066 break;
4067
4068 case OP_NOT_HSPACE:
4069 for (i = 1; i <= min; i++)
4070 {
4071 if (eptr >= md->end_subject)
4072 {
4073 SCHECK_PARTIAL();
4074 MRRETURN(MATCH_NOMATCH);
4075 }
4076 switch(*eptr++)
4077 {
4078 default: break;
4079 case 0x09: /* HT */
4080 case 0x20: /* SPACE */
4081 case 0xa0: /* NBSP */
4082 MRRETURN(MATCH_NOMATCH);
4083 }
4084 }
4085 break;
4086
4087 case OP_HSPACE:
4088 for (i = 1; i <= min; i++)
4089 {
4090 if (eptr >= md->end_subject)
4091 {
4092 SCHECK_PARTIAL();
4093 MRRETURN(MATCH_NOMATCH);
4094 }
4095 switch(*eptr++)
4096 {
4097 default: MRRETURN(MATCH_NOMATCH);
4098 case 0x09: /* HT */
4099 case 0x20: /* SPACE */
4100 case 0xa0: /* NBSP */
4101 break;
4102 }
4103 }
4104 break;
4105
4106 case OP_NOT_VSPACE:
4107 for (i = 1; i <= min; i++)
4108 {
4109 if (eptr >= md->end_subject)
4110 {
4111 SCHECK_PARTIAL();
4112 MRRETURN(MATCH_NOMATCH);
4113 }
4114 switch(*eptr++)
4115 {
4116 default: break;
4117 case 0x0a: /* LF */
4118 case 0x0b: /* VT */
4119 case 0x0c: /* FF */
4120 case 0x0d: /* CR */
4121 case 0x85: /* NEL */
4122 MRRETURN(MATCH_NOMATCH);
4123 }
4124 }
4125 break;
4126
4127 case OP_VSPACE:
4128 for (i = 1; i <= min; i++)
4129 {
4130 if (eptr >= md->end_subject)
4131 {
4132 SCHECK_PARTIAL();
4133 MRRETURN(MATCH_NOMATCH);
4134 }
4135 switch(*eptr++)
4136 {
4137 default: MRRETURN(MATCH_NOMATCH);
4138 case 0x0a: /* LF */
4139 case 0x0b: /* VT */
4140 case 0x0c: /* FF */
4141 case 0x0d: /* CR */
4142 case 0x85: /* NEL */
4143 break;
4144 }
4145 }
4146 break;
4147
4148 case OP_NOT_DIGIT:
4149 for (i = 1; i <= min; i++)
4150 {
4151 if (eptr >= md->end_subject)
4152 {
4153 SCHECK_PARTIAL();
4154 MRRETURN(MATCH_NOMATCH);
4155 }
4156 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4157 }
4158 break;
4159
4160 case OP_DIGIT:
4161 for (i = 1; i <= min; i++)
4162 {
4163 if (eptr >= md->end_subject)
4164 {
4165 SCHECK_PARTIAL();
4166 MRRETURN(MATCH_NOMATCH);
4167 }
4168 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4169 }
4170 break;
4171
4172 case OP_NOT_WHITESPACE:
4173 for (i = 1; i <= min; i++)
4174 {
4175 if (eptr >= md->end_subject)
4176 {
4177 SCHECK_PARTIAL();
4178 MRRETURN(MATCH_NOMATCH);
4179 }
4180 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4181 }
4182 break;
4183
4184 case OP_WHITESPACE:
4185 for (i = 1; i <= min; i++)
4186 {
4187 if (eptr >= md->end_subject)
4188 {
4189 SCHECK_PARTIAL();
4190 MRRETURN(MATCH_NOMATCH);
4191 }
4192 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4193 }
4194 break;
4195
4196 case OP_NOT_WORDCHAR:
4197 for (i = 1; i <= min; i++)
4198 {
4199 if (eptr >= md->end_subject)
4200 {
4201 SCHECK_PARTIAL();
4202 MRRETURN(MATCH_NOMATCH);
4203 }
4204 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4205 MRRETURN(MATCH_NOMATCH);
4206 }
4207 break;
4208
4209 case OP_WORDCHAR:
4210 for (i = 1; i <= min; i++)
4211 {
4212 if (eptr >= md->end_subject)
4213 {
4214 SCHECK_PARTIAL();
4215 MRRETURN(MATCH_NOMATCH);
4216 }
4217 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4218 MRRETURN(MATCH_NOMATCH);
4219 }
4220 break;
4221
4222 default:
4223 RRETURN(PCRE_ERROR_INTERNAL);
4224 }
4225 }
4226
4227 /* If min = max, continue at the same level without recursing */
4228
4229 if (min == max) continue;
4230
4231 /* If minimizing, we have to test the rest of the pattern before each
4232 subsequent match. Again, separate the UTF-8 case for speed, and also
4233 separate the UCP cases. */
4234
4235 if (minimize)
4236 {
4237 #ifdef SUPPORT_UCP
4238 if (prop_type >= 0)
4239 {
4240 switch(prop_type)
4241 {
4242 case PT_ANY:
4243 for (fi = min;; fi++)
4244 {
4245 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4246 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4247 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4248 if (eptr >= md->end_subject)
4249 {
4250 SCHECK_PARTIAL();
4251 MRRETURN(MATCH_NOMATCH);
4252 }
4253 GETCHARINCTEST(c, eptr);
4254 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4255 }
4256 /* Control never gets here */
4257
4258 case PT_LAMP:
4259 for (fi = min;; fi++)
4260 {
4261 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4262 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4263 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4264 if (eptr >= md->end_subject)
4265 {
4266 SCHECK_PARTIAL();
4267 MRRETURN(MATCH_NOMATCH);
4268 }
4269 GETCHARINCTEST(c, eptr);
4270 prop_chartype = UCD_CHARTYPE(c);
4271 if ((prop_chartype == ucp_Lu ||
4272 prop_chartype == ucp_Ll ||
4273 prop_chartype == ucp_Lt) == prop_fail_result)
4274 MRRETURN(MATCH_NOMATCH);
4275 }
4276 /* Control never gets here */
4277
4278 case PT_GC:
4279 for (fi = min;; fi++)
4280 {
4281 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4282 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4283 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4284 if (eptr >= md->end_subject)
4285 {
4286 SCHECK_PARTIAL();
4287 MRRETURN(MATCH_NOMATCH);
4288 }
4289 GETCHARINCTEST(c, eptr);
4290 prop_category = UCD_CATEGORY(c);
4291 if ((prop_category == prop_value) == prop_fail_result)
4292 MRRETURN(MATCH_NOMATCH);
4293 }
4294 /* Control never gets here */
4295
4296 case PT_PC:
4297 for (fi = min;; fi++)
4298 {
4299 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4300 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4301 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4302 if (eptr >= md->end_subject)
4303 {
4304 SCHECK_PARTIAL();
4305 MRRETURN(MATCH_NOMATCH);
4306 }
4307 GETCHARINCTEST(c, eptr);
4308 prop_chartype = UCD_CHARTYPE(c);
4309 if ((prop_chartype == prop_value) == prop_fail_result)
4310 MRRETURN(MATCH_NOMATCH);
4311 }
4312 /* Control never gets here */
4313
4314 case PT_SC:
4315 for (fi = min;; fi++)
4316 {
4317 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4318 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4319 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4320 if (eptr >= md->end_subject)
4321 {
4322 SCHECK_PARTIAL();
4323 MRRETURN(MATCH_NOMATCH);
4324 }
4325 GETCHARINCTEST(c, eptr);
4326 prop_script = UCD_SCRIPT(c);
4327 if ((prop_script == prop_value) == prop_fail_result)
4328 MRRETURN(MATCH_NOMATCH);
4329 }
4330 /* Control never gets here */
4331
4332 case PT_ALNUM:
4333 for (fi = min;; fi++)
4334 {
4335 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4336 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4338 if (eptr >= md->end_subject)
4339 {
4340 SCHECK_PARTIAL();
4341 MRRETURN(MATCH_NOMATCH);
4342 }
4343 GETCHARINCTEST(c, eptr);
4344 prop_category = UCD_CATEGORY(c);
4345 if ((prop_category == ucp_L || prop_category == ucp_N)
4346 == prop_fail_result)
4347 MRRETURN(MATCH_NOMATCH);
4348 }
4349 /* Control never gets here */
4350
4351 case PT_SPACE: /* Perl space */
4352 for (fi = min;; fi++)
4353 {
4354 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4355 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4357 if (eptr >= md->end_subject)
4358 {
4359 SCHECK_PARTIAL();
4360 MRRETURN(MATCH_NOMATCH);
4361 }
4362 GETCHARINCTEST(c, eptr);
4363 prop_category = UCD_CATEGORY(c);
4364 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365 c == CHAR_FF || c == CHAR_CR)
4366 == prop_fail_result)
4367 MRRETURN(MATCH_NOMATCH);
4368 }
4369 /* Control never gets here */
4370
4371 case PT_PXSPACE: /* POSIX space */
4372 for (fi = min;; fi++)
4373 {
4374 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4375 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4377 if (eptr >= md->end_subject)
4378 {
4379 SCHECK_PARTIAL();
4380 MRRETURN(MATCH_NOMATCH);
4381 }
4382 GETCHARINCTEST(c, eptr);
4383 prop_category = UCD_CATEGORY(c);
4384 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386 == prop_fail_result)
4387 MRRETURN(MATCH_NOMATCH);
4388 }
4389 /* Control never gets here */
4390
4391 case PT_WORD:
4392 for (fi = min;; fi++)
4393 {
4394 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4395 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4397 if (eptr >= md->end_subject)
4398 {
4399 SCHECK_PARTIAL();
4400 MRRETURN(MATCH_NOMATCH);
4401 }
4402 GETCHARINCTEST(c, eptr);
4403 prop_category = UCD_CATEGORY(c);
4404 if ((prop_category == ucp_L ||
4405 prop_category == ucp_N ||
4406 c == CHAR_UNDERSCORE)
4407 == prop_fail_result)
4408 MRRETURN(MATCH_NOMATCH);
4409 }
4410 /* Control never gets here */
4411
4412 /* This should never occur */
4413
4414 default:
4415 RRETURN(PCRE_ERROR_INTERNAL);
4416 }
4417 }
4418
4419 /* Match extended Unicode sequences. We will get here only if the
4420 support is in the binary; otherwise a compile-time error occurs. */
4421
4422 else if (ctype == OP_EXTUNI)
4423 {
4424 for (fi = min;; fi++)
4425 {
4426 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4427 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4428 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4429 if (eptr >= md->end_subject)
4430 {
4431 SCHECK_PARTIAL();
4432 MRRETURN(MATCH_NOMATCH);
4433 }
4434 GETCHARINCTEST(c, eptr);
4435 prop_category = UCD_CATEGORY(c);
4436 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4437 while (eptr < md->end_subject)
4438 {
4439 int len = 1;
4440 if (!utf8) c = *eptr;
4441 else { GETCHARLEN(c, eptr, len); }
4442 prop_category = UCD_CATEGORY(c);
4443 if (prop_category != ucp_M) break;
4444 eptr += len;
4445 }
4446 }
4447 }
4448
4449 else
4450 #endif /* SUPPORT_UCP */
4451
4452 #ifdef SUPPORT_UTF8
4453 /* UTF-8 mode */
4454 if (utf8)
4455 {
4456 for (fi = min;; fi++)
4457 {
4458 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4459 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4460 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4461 if (eptr >= md->end_subject)
4462 {
4463 SCHECK_PARTIAL();
4464 MRRETURN(MATCH_NOMATCH);
4465 }
4466 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4467 MRRETURN(MATCH_NOMATCH);
4468 GETCHARINC(c, eptr);
4469 switch(ctype)
4470 {
4471 case OP_ANY: /* This is the non-NL case */
4472 case OP_ALLANY:
4473 case OP_ANYBYTE:
4474 break;
4475
4476 case OP_ANYNL:
4477 switch(c)
4478 {
4479 default: MRRETURN(MATCH_NOMATCH);
4480 case 0x000d:
4481 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4482 break;
4483 case 0x000a:
4484 break;
4485
4486 case 0x000b:
4487 case 0x000c:
4488 case 0x0085:
4489 case 0x2028:
4490 case 0x2029:
4491 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4492 break;
4493 }
4494 break;
4495
4496 case OP_NOT_HSPACE:
4497 switch(c)
4498 {
4499 default: break;
4500 case 0x09: /* HT */
4501 case 0x20: /* SPACE */
4502 case 0xa0: /* NBSP */
4503 case 0x1680: /* OGHAM SPACE MARK */
4504 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4505 case 0x2000: /* EN QUAD */
4506 case 0x2001: /* EM QUAD */
4507 case 0x2002: /* EN SPACE */
4508 case 0x2003: /* EM SPACE */
4509 case 0x2004: /* THREE-PER-EM SPACE */
4510 case 0x2005: /* FOUR-PER-EM SPACE */
4511 case 0x2006: /* SIX-PER-EM SPACE */
4512 case 0x2007: /* FIGURE SPACE */
4513 case 0x2008: /* PUNCTUATION SPACE */
4514 case 0x2009: /* THIN SPACE */
4515 case 0x200A: /* HAIR SPACE */
4516 case 0x202f: /* NARROW NO-BREAK SPACE */
4517 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4518 case 0x3000: /* IDEOGRAPHIC SPACE */
4519 MRRETURN(MATCH_NOMATCH);
4520 }
4521 break;
4522
4523 case OP_HSPACE:
4524 switch(c)
4525 {
4526 default: MRRETURN(MATCH_NOMATCH);
4527 case 0x09: /* HT */
4528 case 0x20: /* SPACE */
4529 case 0xa0: /* NBSP */
4530 case 0x1680: /* OGHAM SPACE MARK */
4531 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4532 case 0x2000: /* EN QUAD */
4533 case 0x2001: /* EM QUAD */
4534 case 0x2002: /* EN SPACE */
4535 case 0x2003: /* EM SPACE */
4536 case 0x2004: /* THREE-PER-EM SPACE */
4537 case 0x2005: /* FOUR-PER-EM SPACE */
4538 case 0x2006: /* SIX-PER-EM SPACE */
4539 case 0x2007: /* FIGURE SPACE */
4540 case 0x2008: /* PUNCTUATION SPACE */
4541 case 0x2009: /* THIN SPACE */
4542 case 0x200A: /* HAIR SPACE */
4543 case 0x202f: /* NARROW NO-BREAK SPACE */
4544 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4545 case 0x3000: /* IDEOGRAPHIC SPACE */
4546 break;
4547 }
4548 break;
4549
4550 case OP_NOT_VSPACE:
4551 switch(c)
4552 {
4553 default: break;
4554 case 0x0a: /* LF */
4555 case 0x0b: /* VT */
4556 case 0x0c: /* FF */
4557 case 0x0d: /* CR */
4558 case 0x85: /* NEL */
4559 case 0x2028: /* LINE SEPARATOR */
4560 case 0x2029: /* PARAGRAPH SEPARATOR */
4561 MRRETURN(MATCH_NOMATCH);
4562 }
4563 break;
4564
4565 case OP_VSPACE:
4566 switch(c)
4567 {
4568 default: MRRETURN(MATCH_NOMATCH);
4569 case 0x0a: /* LF */
4570 case 0x0b: /* VT */
4571 case 0x0c: /* FF */
4572 case 0x0d: /* CR */
4573 case 0x85: /* NEL */
4574 case 0x2028: /* LINE SEPARATOR */
4575 case 0x2029: /* PARAGRAPH SEPARATOR */
4576 break;
4577 }
4578 break;
4579
4580 case OP_NOT_DIGIT:
4581 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4582 MRRETURN(MATCH_NOMATCH);
4583 break;
4584
4585 case OP_DIGIT:
4586 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4587 MRRETURN(MATCH_NOMATCH);
4588 break;
4589
4590 case OP_NOT_WHITESPACE:
4591 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4592 MRRETURN(MATCH_NOMATCH);
4593 break;
4594
4595 case OP_WHITESPACE:
4596 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4597 MRRETURN(MATCH_NOMATCH);
4598 break;
4599
4600 case OP_NOT_WORDCHAR:
4601 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4602 MRRETURN(MATCH_NOMATCH);
4603 break;
4604
4605 case OP_WORDCHAR:
4606 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4607 MRRETURN(MATCH_NOMATCH);
4608 break;
4609
4610 default:
4611 RRETURN(PCRE_ERROR_INTERNAL);
4612 }
4613 }
4614 }
4615 else
4616 #endif
4617 /* Not UTF-8 mode */
4618 {
4619 for (fi = min;; fi++)
4620 {
4621 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4622 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4623 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4624 if (eptr >= md->end_subject)
4625 {
4626 SCHECK_PARTIAL();
4627 MRRETURN(MATCH_NOMATCH);
4628 }
4629 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4630 MRRETURN(MATCH_NOMATCH);
4631 c = *eptr++;
4632 switch(ctype)
4633 {
4634 case OP_ANY: /* This is the non-NL case */
4635 case OP_ALLANY:
4636 case OP_ANYBYTE:
4637 break;
4638
4639 case OP_ANYNL:
4640 switch(c)
4641 {
4642 default: MRRETURN(MATCH_NOMATCH);
4643 case 0x000d:
4644 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4645 break;
4646
4647 case 0x000a:
4648 break;
4649
4650 case 0x000b:
4651 case 0x000c:
4652 case 0x0085:
4653 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4654 break;
4655 }
4656 break;
4657
4658 case OP_NOT_HSPACE:
4659 switch(c)
4660 {
4661 default: break;
4662 case 0x09: /* HT */
4663 case 0x20: /* SPACE */
4664 case 0xa0: /* NBSP */
4665 MRRETURN(MATCH_NOMATCH);
4666 }
4667 break;
4668
4669 case OP_HSPACE:
4670 switch(c)
4671 {
4672 default: MRRETURN(MATCH_NOMATCH);
4673 case 0x09: /* HT */
4674 case 0x20: /* SPACE */
4675 case 0xa0: /* NBSP */
4676 break;
4677 }
4678 break;
4679
4680 case OP_NOT_VSPACE:
4681 switch(c)
4682 {
4683 default: break;
4684 case 0x0a: /* LF */
4685 case 0x0b: /* VT */
4686 case 0x0c: /* FF */
4687 case 0x0d: /* CR */
4688 case 0x85: /* NEL */
4689 MRRETURN(MATCH_NOMATCH);
4690 }
4691 break;
4692
4693 case OP_VSPACE:
4694 switch(c)
4695 {
4696 default: MRRETURN(MATCH_NOMATCH);
4697 case 0x0a: /* LF */
4698 case 0x0b: /* VT */
4699 case 0x0c: /* FF */
4700 case 0x0d: /* CR */
4701 case 0x85: /* NEL */
4702 break;
4703 }
4704 break;
4705
4706 case OP_NOT_DIGIT:
4707 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4708 break;
4709
4710 case OP_DIGIT:
4711 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4712 break;
4713
4714 case OP_NOT_WHITESPACE:
4715 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4716 break;
4717
4718 case OP_WHITESPACE:
4719 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4720 break;
4721
4722 case OP_NOT_WORDCHAR:
4723 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4724 break;
4725
4726 case OP_WORDCHAR:
4727 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4728 break;
4729
4730 default:
4731 RRETURN(PCRE_ERROR_INTERNAL);
4732 }
4733 }
4734 }
4735 /* Control never gets here */
4736 }
4737
4738 /* If maximizing, it is worth using inline code for speed, doing the type
4739 test once at the start (i.e. keep it out of the loop). Again, keep the
4740 UTF-8 and UCP stuff separate. */
4741
4742 else
4743 {
4744 pp = eptr; /* Remember where we started */
4745
4746 #ifdef SUPPORT_UCP
4747 if (prop_type >= 0)
4748 {
4749 switch(prop_type)
4750 {
4751 case PT_ANY:
4752 for (i = min; i < max; i++)
4753 {
4754 int len = 1;
4755 if (eptr >= md->end_subject)
4756 {
4757 SCHECK_PARTIAL();
4758 break;
4759 }
4760 GETCHARLENTEST(c, eptr, len);
4761 if (prop_fail_result) break;
4762 eptr+= len;
4763 }
4764 break;
4765
4766 case PT_LAMP:
4767 for (i = min; i < max; i++)
4768 {
4769 int len = 1;
4770 if (eptr >= md->end_subject)
4771 {
4772 SCHECK_PARTIAL();
4773 break;
4774 }
4775 GETCHARLENTEST(c, eptr, len);
4776 prop_chartype = UCD_CHARTYPE(c);
4777 if ((prop_chartype == ucp_Lu ||
4778 prop_chartype == ucp_Ll ||
4779 prop_chartype == ucp_Lt) == prop_fail_result)
4780 break;
4781 eptr+= len;
4782 }
4783 break;
4784
4785 case PT_GC:
4786 for (i = min; i < max; i++)
4787 {
4788 int len = 1;
4789 if (eptr >= md->end_subject)
4790 {
4791 SCHECK_PARTIAL();
4792 break;
4793 }
4794 GETCHARLENTEST(c, eptr, len);
4795 prop_category = UCD_CATEGORY(c);
4796 if ((prop_category == prop_value) == prop_fail_result)
4797 break;
4798 eptr+= len;
4799 }
4800 break;
4801
4802 case PT_PC:
4803 for (i = min; i < max; i++)
4804 {
4805 int len = 1;
4806 if (eptr >= md->end_subject)
4807 {
4808 SCHECK_PARTIAL();
4809 break;
4810 }
4811 GETCHARLENTEST(c, eptr, len);
4812 prop_chartype = UCD_CHARTYPE(c);
4813 if ((prop_chartype == prop_value) == prop_fail_result)
4814 break;
4815 eptr+= len;
4816 }
4817 break;
4818
4819 case PT_SC:
4820 for (i = min; i < max; i++)
4821 {
4822 int len = 1;
4823 if (eptr >= md->end_subject)
4824 {
4825 SCHECK_PARTIAL();
4826 break;
4827 }
4828 GETCHARLENTEST(c, eptr, len);
4829 prop_script = UCD_SCRIPT(c);
4830 if ((prop_script == prop_value) == prop_fail_result)
4831 break;
4832 eptr+= len;
4833 }
4834 break;
4835
4836 case PT_ALNUM:
4837 for (i = min; i < max; i++)
4838 {
4839 int len = 1;
4840 if (eptr >= md->end_subject)
4841 {
4842 SCHECK_PARTIAL();
4843 break;
4844 }
4845 GETCHARLENTEST(c, eptr, len);
4846 prop_category = UCD_CATEGORY(c);
4847 if ((prop_category == ucp_L || prop_category == ucp_N)
4848 == prop_fail_result)
4849 break;
4850 eptr+= len;
4851 }
4852 break;
4853
4854 case PT_SPACE: /* Perl space */
4855 for (i = min; i < max; i++)
4856 {
4857 int len = 1;
4858 if (eptr >= md->end_subject)
4859 {
4860 SCHECK_PARTIAL();
4861 break;
4862 }
4863 GETCHARLENTEST(c, eptr, len);
4864 prop_category = UCD_CATEGORY(c);
4865 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4866 c == CHAR_FF || c == CHAR_CR)
4867 == prop_fail_result)
4868 break;
4869 eptr+= len;
4870 }
4871 break;
4872
4873 case PT_PXSPACE: /* POSIX space */
4874 for (i = min; i < max; i++)
4875 {
4876 int len = 1;
4877 if (eptr >= md->end_subject)
4878 {
4879 SCHECK_PARTIAL();
4880 break;
4881 }
4882 GETCHARLENTEST(c, eptr, len);
4883 prop_category = UCD_CATEGORY(c);
4884 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4885 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4886 == prop_fail_result)
4887 break;
4888 eptr+= len;
4889 }
4890 break;
4891
4892 case PT_WORD:
4893 for (i = min; i < max; i++)
4894 {
4895 int len = 1;
4896 if (eptr >= md->end_subject)
4897 {
4898 SCHECK_PARTIAL();
4899 break;
4900 }
4901 GETCHARLENTEST(c, eptr, len);
4902 prop_category = UCD_CATEGORY(c);
4903 if ((prop_category == ucp_L || prop_category == ucp_N ||
4904 c == CHAR_UNDERSCORE) == prop_fail_result)
4905 break;
4906 eptr+= len;
4907 }
4908 break;
4909
4910 default:
4911 RRETURN(PCRE_ERROR_INTERNAL);
4912 }
4913
4914 /* eptr is now past the end of the maximum run */
4915
4916 if (possessive) continue;
4917 for(;;)
4918 {
4919 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4920 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4921 if (eptr-- == pp) break; /* Stop if tried at original pos */
4922 if (utf8) BACKCHAR(eptr);
4923 }
4924 }
4925
4926 /* Match extended Unicode sequences. We will get here only if the
4927 support is in the binary; otherwise a compile-time error occurs. */
4928
4929 else if (ctype == OP_EXTUNI)
4930 {
4931 for (i = min; i < max; i++)
4932 {
4933 if (eptr >= md->end_subject)
4934 {
4935 SCHECK_PARTIAL();
4936 break;
4937 }
4938 GETCHARINCTEST(c, eptr);
4939 prop_category = UCD_CATEGORY(c);
4940 if (prop_category == ucp_M) break;
4941 while (eptr < md->end_subject)
4942 {
4943 int len = 1;
4944 if (!utf8) c = *eptr; else
4945 {
4946 GETCHARLEN(c, eptr, len);
4947 }
4948 prop_category = UCD_CATEGORY(c);
4949 if (prop_category != ucp_M) break;
4950 eptr += len;
4951 }
4952 }
4953
4954 /* eptr is now past the end of the maximum run */
4955
4956 if (possessive) continue;
4957
4958 for(;;)
4959 {
4960 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4961 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4962 if (eptr-- == pp) break; /* Stop if tried at original pos */
4963 for (;;) /* Move back over one extended */
4964 {
4965 int len = 1;
4966 if (!utf8) c = *eptr; else
4967 {
4968 BACKCHAR(eptr);
4969 GETCHARLEN(c, eptr, len);
4970 }
4971 prop_category = UCD_CATEGORY(c);
4972 if (prop_category != ucp_M) break;
4973 eptr--;
4974 }
4975 }
4976 }
4977
4978 else
4979 #endif /* SUPPORT_UCP */
4980
4981 #ifdef SUPPORT_UTF8
4982 /* UTF-8 mode */
4983
4984 if (utf8)
4985 {
4986 switch(ctype)
4987 {
4988 case OP_ANY:
4989 if (max < INT_MAX)
4990 {
4991 for (i = min; i < max; i++)
4992 {
4993 if (eptr >= md->end_subject)
4994 {
4995 SCHECK_PARTIAL();
4996 break;
4997 }
4998 if (IS_NEWLINE(eptr)) break;
4999 eptr++;
5000 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5001 }
5002 }
5003
5004 /* Handle unlimited UTF-8 repeat */
5005
5006 else
5007 {
5008 for (i = min; i < max; i++)
5009 {
5010 if (eptr >= md->end_subject)
5011 {
5012 SCHECK_PARTIAL();
5013 break;
5014 }
5015 if (IS_NEWLINE(eptr)) break;
5016 eptr++;
5017 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5018 }
5019 }
5020 break;
5021
5022 case OP_ALLANY:
5023 if (max < INT_MAX)
5024 {
5025 for (i = min; i < max; i++)
5026 {
5027 if (eptr >= md->end_subject)
5028 {
5029 SCHECK_PARTIAL();
5030 break;
5031 }
5032 eptr++;
5033 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5034 }
5035 }
5036 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5037 break;
5038
5039 /* The byte case is the same as non-UTF8 */
5040
5041 case OP_ANYBYTE:
5042 c = max - min;
5043 if (c > (unsigned int)(md->end_subject - eptr))
5044 {
5045 eptr = md->end_subject;
5046 SCHECK_PARTIAL();
5047 }
5048 else eptr += c;
5049 break;
5050
5051 case OP_ANYNL:
5052 for (i = min; i < max; i++)
5053 {
5054 int len = 1;
5055 if (eptr >= md->end_subject)
5056 {
5057 SCHECK_PARTIAL();
5058 break;
5059 }
5060 GETCHARLEN(c, eptr, len);
5061 if (c == 0x000d)
5062 {
5063 if (++eptr >= md->end_subject) break;
5064 if (*eptr == 0x000a) eptr++;
5065 }
5066 else
5067 {
5068 if (c != 0x000a &&
5069 (md->bsr_anycrlf ||
5070 (c != 0x000b && c != 0x000c &&
5071 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5072 break;
5073 eptr += len;
5074 }
5075 }
5076 break;
5077
5078 case OP_NOT_HSPACE:
5079 case OP_HSPACE:
5080 for (i = min; i < max; i++)
5081 {
5082 BOOL gotspace;
5083 int len = 1;
5084 if (eptr >= md->end_subject)
5085 {
5086 SCHECK_PARTIAL();
5087 break;
5088 }
5089 GETCHARLEN(c, eptr, len);
5090 switch(c)
5091 {
5092 default: gotspace = FALSE; break;
5093 case 0x09: /* HT */
5094 case 0x20: /* SPACE */
5095 case 0xa0: /* NBSP */
5096 case 0x1680: /* OGHAM SPACE MARK */
5097 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5098 case 0x2000: /* EN QUAD */
5099 case 0x2001: /* EM QUAD */
5100 case 0x2002: /* EN SPACE */
5101 case 0x2003: /* EM SPACE */
5102 case 0x2004: /* THREE-PER-EM SPACE */
5103 case 0x2005: /* FOUR-PER-EM SPACE */
5104 case 0x2006: /* SIX-PER-EM SPACE */
5105 case 0x2007: /* FIGURE SPACE */
5106 case 0x2008: /* PUNCTUATION SPACE */
5107 case 0x2009: /* THIN SPACE */
5108 case 0x200A: /* HAIR SPACE */
5109 case 0x202f: /* NARROW NO-BREAK SPACE */
5110 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5111 case 0x3000: /* IDEOGRAPHIC SPACE */
5112 gotspace = TRUE;
5113 break;
5114 }
5115 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5116 eptr += len;
5117 }
5118 break;
5119
5120 case OP_NOT_VSPACE:
5121 case OP_VSPACE:
5122 for (i = min; i < max; i++)
5123 {
5124 BOOL gotspace;
5125 int len = 1;
5126 if (eptr >= md->end_subject)
5127 {
5128 SCHECK_PARTIAL();
5129 break;
5130 }
5131 GETCHARLEN(c, eptr, len);
5132 switch(c)
5133 {
5134 default: gotspace = FALSE; break;
5135 case 0x0a: /* LF */
5136 case 0x0b: /* VT */
5137 case 0x0c: /* FF */
5138 case 0x0d: /* CR */
5139 case 0x85: /* NEL */
5140 case 0x2028: /* LINE SEPARATOR */
5141 case 0x2029: /* PARAGRAPH SEPARATOR */
5142 gotspace = TRUE;
5143 break;
5144 }
5145 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5146 eptr += len;
5147 }
5148 break;
5149
5150 case OP_NOT_DIGIT:
5151 for (i = min; i < max; i++)
5152 {
5153 int len = 1;
5154 if (eptr >= md->end_subject)
5155 {
5156 SCHECK_PARTIAL();
5157 break;
5158 }
5159 GETCHARLEN(c, eptr, len);
5160 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5161 eptr+= len;
5162 }
5163 break;
5164
5165 case OP_DIGIT:
5166 for (i = min; i < max; i++)
5167 {
5168 int len = 1;
5169 if (eptr >= md->end_subject)
5170 {
5171 SCHECK_PARTIAL();
5172 break;
5173 }
5174 GETCHARLEN(c, eptr, len);
5175 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5176 eptr+= len;
5177 }
5178 break;
5179
5180 case OP_NOT_WHITESPACE:
5181 for (i = min; i < max; i++)
5182 {
5183 int len = 1;
5184 if (eptr >= md->end_subject)
5185 {
5186 SCHECK_PARTIAL();
5187 break;
5188 }
5189 GETCHARLEN(c, eptr, len);
5190 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5191 eptr+= len;
5192 }
5193 break;
5194
5195 case OP_WHITESPACE:
5196 for (i = min; i < max; i++)
5197 {
5198 int len = 1;
5199 if (eptr >= md->end_subject)
5200 {
5201 SCHECK_PARTIAL();
5202 break;
5203 }
5204 GETCHARLEN(c, eptr, len);
5205 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5206 eptr+= len;
5207 }
5208 break;
5209
5210 case OP_NOT_WORDCHAR:
5211 for (i = min; i < max; i++)
5212 {
5213 int len = 1;
5214 if (eptr >= md->end_subject)
5215 {
5216 SCHECK_PARTIAL();
5217 break;
5218 }
5219 GETCHARLEN(c, eptr, len);
5220 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5221 eptr+= len;
5222 }
5223 break;
5224
5225 case OP_WORDCHAR:
5226 for (i = min; i < max; i++)
5227 {
5228 int len = 1;
5229 if (eptr >= md->end_subject)
5230 {
5231 SCHECK_PARTIAL();
5232 break;
5233 }
5234 GETCHARLEN(c, eptr, len);
5235 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5236 eptr+= len;
5237 }
5238 break;
5239
5240 default:
5241 RRETURN(PCRE_ERROR_INTERNAL);
5242 }
5243
5244 /* eptr is now past the end of the maximum run */
5245
5246 if (possessive) continue;
5247 for(;;)
5248 {
5249 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5250 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5251 if (eptr-- == pp) break; /* Stop if tried at original pos */
5252 BACKCHAR(eptr);
5253 }
5254 }
5255 else
5256 #endif /* SUPPORT_UTF8 */
5257
5258 /* Not UTF-8 mode */
5259 {
5260 switch(ctype)
5261 {
5262 case OP_ANY:
5263 for (i = min; i < max; i++)
5264 {
5265 if (eptr >= md->end_subject)
5266 {
5267 SCHECK_PARTIAL();
5268 break;
5269 }
5270 if (IS_NEWLINE(eptr)) break;
5271 eptr++;
5272 }
5273 break;
5274
5275 case OP_ALLANY:
5276 case OP_ANYBYTE:
5277 c = max - min;
5278 if (c > (unsigned int)(md->end_subject - eptr))
5279 {
5280 eptr = md->end_subject;
5281 SCHECK_PARTIAL();
5282 }
5283 else eptr += c;
5284 break;
5285
5286 case OP_ANYNL:
5287 for (i = min; i < max; i++)
5288 {
5289 if (eptr >= md->end_subject)
5290 {
5291 SCHECK_PARTIAL();
5292 break;
5293 }
5294 c = *eptr;
5295 if (c == 0x000d)
5296 {
5297 if (++eptr >= md->end_subject) break;
5298 if (*eptr == 0x000a) eptr++;
5299 }
5300 else
5301 {
5302 if (c != 0x000a &&
5303 (md->bsr_anycrlf ||
5304 (c != 0x000b && c != 0x000c && c != 0x0085)))
5305 break;
5306 eptr++;
5307 }
5308 }
5309 break;
5310
5311 case OP_NOT_HSPACE:
5312 for (i = min; i < max; i++)
5313 {
5314 if (eptr >= md->end_subject)
5315 {
5316 SCHECK_PARTIAL();
5317 break;
5318 }
5319 c = *eptr;
5320 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5321 eptr++;
5322 }
5323 break;
5324
5325 case OP_HSPACE:
5326 for (i = min; i < max; i++)
5327 {
5328 if (eptr >= md->end_subject)
5329 {
5330 SCHECK_PARTIAL();
5331 break;
5332 }
5333 c = *eptr;
5334 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5335 eptr++;
5336 }
5337 break;
5338
5339 case OP_NOT_VSPACE:
5340 for (i = min; i < max; i++)
5341 {
5342 if (eptr >= md->end_subject)
5343 {
5344 SCHECK_PARTIAL();
5345 break;
5346 }
5347 c = *eptr;
5348 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5349 break;
5350 eptr++;
5351 }
5352 break;
5353
5354 case OP_VSPACE:
5355 for (i = min; i < max; i++)
5356 {
5357 if (eptr >= md->end_subject)
5358 {
5359 SCHECK_PARTIAL();
5360 break;
5361 }
5362 c = *eptr;
5363 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5364 break;
5365 eptr++;
5366 }
5367 break;
5368
5369 case OP_NOT_DIGIT:
5370 for (i = min; i < max; i++)
5371 {
5372 if (eptr >= md->end_subject)
5373 {
5374 SCHECK_PARTIAL();
5375 break;
5376 }
5377 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5378 eptr++;
5379 }
5380 break;
5381
5382 case OP_DIGIT:
5383 for (i = min; i < max; i++)
5384 {
5385 if (eptr >= md->end_subject)
5386 {
5387 SCHECK_PARTIAL();
5388 break;
5389 }
5390 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5391 eptr++;
5392 }
5393 break;
5394
5395 case OP_NOT_WHITESPACE:
5396 for (i = min; i < max; i++)
5397 {
5398 if (eptr >= md->end_subject)
5399 {
5400 SCHECK_PARTIAL();
5401 break;
5402 }
5403 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5404 eptr++;
5405 }
5406 break;
5407
5408 case OP_WHITESPACE:
5409 for (i = min; i < max; i++)
5410 {
5411 if (eptr >= md->end_subject)
5412 {
5413 SCHECK_PARTIAL();
5414 break;
5415 }
5416 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5417 eptr++;
5418 }
5419 break;
5420
5421 case OP_NOT_WORDCHAR:
5422 for (i = min; i < max; i++)
5423 {
5424 if (eptr >= md->end_subject)
5425 {
5426 SCHECK_PARTIAL();
5427 break;
5428 }
5429 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5430 eptr++;
5431 }
5432 break;
5433
5434 case OP_WORDCHAR:
5435 for (i = min; i < max; i++)
5436 {
5437 if (eptr >= md->end_subject)
5438 {
5439 SCHECK_PARTIAL();
5440 break;
5441 }
5442 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5443 eptr++;
5444 }
5445 break;
5446
5447 default:
5448 RRETURN(PCRE_ERROR_INTERNAL);
5449 }
5450
5451 /* eptr is now past the end of the maximum run */
5452
5453 if (possessive) continue;
5454 while (eptr >= pp)
5455 {
5456 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5457 eptr--;
5458 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5459 }
5460 }
5461
5462 /* Get here if we can't make it match with any permitted repetitions */
5463
5464 MRRETURN(MATCH_NOMATCH);
5465 }
5466 /* Control never gets here */
5467
5468 /* There's been some horrible disaster. Arrival here can only mean there is
5469 something seriously wrong in the code above or the OP_xxx definitions. */
5470
5471 default:
5472 DPRINTF(("Unknown opcode %d\n", *ecode));
5473 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5474 }
5475
5476 /* Do not stick any code in here without much thought; it is assumed
5477 that "continue" in the code above comes out to here to repeat the main
5478 loop. */
5479
5480 } /* End of main loop */
5481 /* Control never reaches here */
5482
5483
5484 /* When compiling to use the heap rather than the stack for recursive calls to
5485 match(), the RRETURN() macro jumps here. The number that is saved in
5486 frame->Xwhere indicates which label we actually want to return to. */
5487
5488 #ifdef NO_RECURSE
5489 #define LBL(val) case val: goto L_RM##val;
5490 HEAP_RETURN:
5491 switch (frame->Xwhere)
5492 {
5493 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5494 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5495 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5496 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5497 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5498 #ifdef SUPPORT_UTF8
5499 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5500 LBL(32) LBL(34) LBL(42) LBL(46)
5501 #ifdef SUPPORT_UCP
5502 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5503 LBL(59) LBL(60) LBL(61) LBL(62)
5504 #endif /* SUPPORT_UCP */
5505 #endif /* SUPPORT_UTF8 */
5506 default:
5507 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5508 return PCRE_ERROR_INTERNAL;
5509 }
5510 #undef LBL
5511 #endif /* NO_RECURSE */
5512 }
5513
5514
5515 /***************************************************************************
5516 ****************************************************************************
5517 RECURSION IN THE match() FUNCTION
5518
5519 Undefine all the macros that were defined above to handle this. */
5520
5521 #ifdef NO_RECURSE
5522 #undef eptr
5523 #undef ecode
5524 #undef mstart
5525 #undef offset_top
5526 #undef ims
5527 #undef eptrb
5528 #undef flags
5529
5530 #undef callpat
5531 #undef charptr
5532 #undef data
5533 #undef next
5534 #undef pp
5535 #undef prev
5536 #undef saved_eptr
5537
5538 #undef new_recursive
5539
5540 #undef cur_is_word
5541 #undef condition
5542 #undef prev_is_word
5543
5544 #undef original_ims
5545
5546 #undef ctype
5547 #undef length
5548 #undef max
5549 #undef min
5550 #undef number
5551 #undef offset
5552 #undef op
5553 #undef save_capture_last
5554 #undef save_offset1
5555 #undef save_offset2
5556 #undef save_offset3
5557 #undef stacksave
5558
5559 #undef newptrb
5560
5561 #endif
5562
5563 /* These two are defined as macros in both cases */
5564
5565 #undef fc
5566 #undef fi
5567
5568 /***************************************************************************
5569 ***************************************************************************/
5570
5571
5572
5573 /*************************************************
5574 * Execute a Regular Expression *
5575 *************************************************/
5576
5577 /* This function applies a compiled re to a subject string and picks out
5578 portions of the string if it matches. Two elements in the vector are set for
5579 each substring: the offsets to the start and end of the substring.
5580
5581 Arguments:
5582 argument_re points to the compiled expression
5583 extra_data points to extra data or is NULL
5584 subject points to the subject string
5585 length length of subject string (may contain binary zeros)
5586 start_offset where to start in the subject string
5587 options option bits
5588 offsets points to a vector of ints to be filled in with offsets
5589 offsetcount the number of elements in the vector
5590
5591 Returns: > 0 => success; value is the number of elements filled in
5592 = 0 => success, but offsets is not big enough
5593 -1 => failed to match
5594 < -1 => some kind of unexpected problem
5595 */
5596
5597 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5598 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5599 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5600 int offsetcount)
5601 {
5602 int rc, resetcount, ocount;
5603 int first_byte = -1;
5604 int req_byte = -1;
5605 int req_byte2 = -1;
5606 int newline;
5607 unsigned long int ims;
5608 BOOL using_temporary_offsets = FALSE;
5609 BOOL anchored;
5610 BOOL startline;
5611 BOOL firstline;
5612 BOOL first_byte_caseless = FALSE;
5613 BOOL req_byte_caseless = FALSE;
5614 BOOL utf8;
5615 match_data match_block;
5616 match_data *md = &match_block;
5617 const uschar *tables;
5618 const uschar *start_bits = NULL;
5619 USPTR start_match = (USPTR)subject + start_offset;
5620 USPTR end_subject;
5621 USPTR start_partial = NULL;
5622 USPTR req_byte_ptr = start_match - 1;
5623
5624 pcre_study_data internal_study;
5625 const pcre_study_data *study;
5626
5627 real_pcre internal_re;
5628 const real_pcre *external_re = (const real_pcre *)argument_re;
5629 const real_pcre *re = external_re;
5630
5631 /* Plausibility checks */
5632
5633 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5634 if (re == NULL || subject == NULL ||
5635 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5636 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5637 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5638
5639 /* This information is for finding all the numbers associated with a given
5640 name, for condition testing. */
5641
5642 md->name_table = (uschar *)re + re->name_table_offset;
5643 md->name_count = re->name_count;
5644 md->name_entry_size = re->name_entry_size;
5645
5646 /* Fish out the optional data from the extra_data structure, first setting
5647 the default values. */
5648
5649 study = NULL;
5650 md->match_limit = MATCH_LIMIT;
5651 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5652 md->callout_data = NULL;
5653
5654 /* The table pointer is always in native byte order. */
5655
5656 tables = external_re->tables;
5657
5658 if (extra_data != NULL)
5659 {
5660 register unsigned int flags = extra_data->flags;
5661 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5662 study = (const pcre_study_data *)extra_data->study_data;
5663 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5664 md->match_limit = extra_data->match_limit;
5665 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5666 md->match_limit_recursion = extra_data->match_limit_recursion;
5667 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5668 md->callout_data = extra_data->callout_data;
5669 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5670 }
5671
5672 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5673 is a feature that makes it possible to save compiled regex and re-use them
5674 in other programs later. */
5675
5676 if (tables == NULL) tables = _pcre_default_tables;
5677
5678 /* Check that the first field in the block is the magic number. If it is not,
5679 test for a regex that was compiled on a host of opposite endianness. If this is
5680 the case, flipped values are put in internal_re and internal_study if there was
5681 study data too. */
5682
5683 if (re->magic_number != MAGIC_NUMBER)
5684 {
5685 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5686 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5687 if (study != NULL) study = &internal_study;
5688 }
5689
5690 /* Set up other data */
5691
5692 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5693 startline = (re->flags & PCRE_STARTLINE) != 0;
5694 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5695
5696 /* The code starts after the real_pcre block and the capture name table. */
5697
5698 md->start_code = (const uschar *)external_re + re->name_table_offset +
5699 re->name_count * re->name_entry_size;
5700
5701 md->start_subject = (USPTR)subject;
5702 md->start_offset = start_offset;
5703 md->end_subject = md->start_subject + length;
5704 end_subject = md->end_subject;
5705
5706 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5707 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5708 md->use_ucp = (re->options & PCRE_UCP) != 0;
5709 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5710
5711 md->notbol = (options & PCRE_NOTBOL) != 0;
5712 md->noteol = (options & PCRE_NOTEOL) != 0;
5713 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5714 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5715 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5716 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5717 md->hitend = FALSE;
5718 md->mark = NULL; /* In case never set */
5719
5720 md->recursive = NULL; /* No recursion at top level */
5721
5722 md->lcc = tables + lcc_offset;
5723 md->ctypes = tables + ctypes_offset;
5724
5725 /* Handle different \R options. */
5726
5727 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5728 {
5729 case 0:
5730 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5731 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5732 else
5733 #ifdef BSR_ANYCRLF
5734 md->bsr_anycrlf = TRUE;
5735 #else
5736 md->bsr_anycrlf = FALSE;
5737 #endif
5738 break;
5739
5740 case PCRE_BSR_ANYCRLF:
5741 md->bsr_anycrlf = TRUE;
5742 break;
5743
5744 case PCRE_BSR_UNICODE:
5745 md->bsr_anycrlf = FALSE;
5746 break;
5747
5748 default: return PCRE_ERROR_BADNEWLINE;
5749 }
5750
5751 /* Handle different types of newline. The three bits give eight cases. If
5752 nothing is set at run time, whatever was used at compile time applies. */
5753
5754 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5755 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5756 {
5757 case 0: newline = NEWLINE; break; /* Compile-time default */
5758 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5759 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5760 case PCRE_NEWLINE_CR+
5761 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5762 case PCRE_NEWLINE_ANY: newline = -1; break;
5763 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5764 default: return PCRE_ERROR_BADNEWLINE;
5765 }
5766
5767 if (newline == -2)
5768 {
5769 md->nltype = NLTYPE_ANYCRLF;
5770 }
5771 else if (newline < 0)
5772 {
5773 md->nltype = NLTYPE_ANY;
5774 }
5775 else
5776 {
5777 md->nltype = NLTYPE_FIXED;
5778 if (newline > 255)
5779 {
5780 md->nllen = 2;
5781 md->nl[0] = (newline >> 8) & 255;
5782 md->nl[1] = newline & 255;
5783 }
5784 else
5785 {
5786 md->nllen = 1;
5787 md->nl[0] = newline;
5788 }
5789 }
5790
5791 /* Partial matching was originally supported only for a restricted set of
5792 regexes; from release 8.00 there are no restrictions, but the bits are still
5793 defined (though never set). So there's no harm in leaving this code. */
5794
5795 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5796 return PCRE_ERROR_BADPARTIAL;
5797
5798 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
5799 back the character offset. */
5800
5801 #ifdef SUPPORT_UTF8
5802 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5803 {
5804 int tb;
5805 if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
5806 return (tb == length && md->partial > 1)?
5807 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5808 if (start_offset > 0 && start_offset < length)
5809 {
5810 tb = ((USPTR)subject)[start_offset] & 0xc0;
5811 if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5812 }
5813 }
5814 #endif
5815
5816 /* The ims options can vary during the matching as a result of the presence
5817 of (?ims) items in the pattern. They are kept in a local variable so that
5818 restoring at the exit of a group is easy. */
5819
5820 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5821
5822 /* If the expression has got more back references than the offsets supplied can
5823 hold, we get a temporary chunk of working store to use during the matching.
5824 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5825 of 3. */
5826
5827 ocount = offsetcount - (offsetcount % 3);
5828
5829 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5830 {
5831 ocount = re->top_backref * 3 + 3;
5832 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5833 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5834 using_temporary_offsets = TRUE;
5835 DPRINTF(("Got memory to hold back references\n"));
5836 }
5837 else md->offset_vector = offsets;
5838
5839 md->offset_end = ocount;
5840 md->offset_max = (2*ocount)/3;
5841 md->offset_overflow = FALSE;
5842 md->capture_last = -1;
5843
5844 /* Compute the minimum number of offsets that we need to reset each time. Doing
5845 this makes a huge difference to execution time when there aren't many brackets
5846 in the pattern. */
5847
5848 resetcount = 2 + re->top_bracket * 2;
5849 if (resetcount > offsetcount) resetcount = ocount;
5850
5851 /* Reset the working variable associated with each extraction. These should
5852 never be used unless previously set, but they get saved and restored, and so we
5853 initialize them to avoid reading uninitialized locations. */
5854
5855 if (md->offset_vector != NULL)
5856 {
5857 register int *iptr = md->offset_vector + ocount;
5858 register int *iend = iptr - resetcount/2 + 1;
5859 while (--iptr >= iend) *iptr = -1;
5860 }
5861
5862 /* Set up the first character to match, if available. The first_byte value is
5863 never set for an anchored regular expression, but the anchoring may be forced
5864 at run time, so we have to test for anchoring. The first char may be unset for
5865 an unanchored pattern, of course. If there's no first char and the pattern was
5866 studied, there may be a bitmap of possible first characters. */
5867
5868 if (!anchored)
5869 {
5870 if ((re->flags & PCRE_FIRSTSET) != 0)
5871 {
5872 first_byte = re->first_byte & 255;
5873 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5874 first_byte = md->lcc[first_byte];
5875 }
5876 else
5877 if (!startline && study != NULL &&
5878 (study->flags & PCRE_STUDY_MAPPED) != 0)
5879 start_bits = study->start_bits;
5880 }
5881
5882 /* For anchored or unanchored matches, there may be a "last known required
5883 character" set. */
5884
5885 if ((re->flags & PCRE_REQCHSET) != 0)
5886 {
5887 req_byte = re->req_byte & 255;
5888 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5889 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5890 }
5891
5892
5893 /* ==========================================================================*/
5894
5895 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5896 the loop runs just once. */
5897
5898 for(;;)
5899 {
5900 USPTR save_end_subject = end_subject;
5901 USPTR new_start_match;
5902
5903 /* Reset the maximum number of extractions we might see. */
5904
5905 if (md->offset_vector != NULL)
5906 {
5907 register int *iptr = md->offset_vector;
5908 register int *iend = iptr + resetcount;
5909 while (iptr < iend) *iptr++ = -1;
5910 }
5911
5912 /* If firstline is TRUE, the start of the match is constrained to the first
5913 line of a multiline string. That is, the match must be before or at the first
5914 newline. Implement this by temporarily adjusting end_subject so that we stop
5915 scanning at a newline. If the match fails at the newline, later code breaks
5916 this loop. */
5917
5918 if (firstline)
5919 {
5920 USPTR t = start_match;
5921 #ifdef SUPPORT_UTF8
5922 if (utf8)
5923 {
5924 while (t < md->end_subject && !IS_NEWLINE(t))
5925 {
5926 t++;
5927 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5928 }
5929 }
5930 else
5931 #endif
5932 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5933 end_subject = t;
5934 }
5935
5936 /* There are some optimizations that avoid running the match if a known
5937 starting point is not found, or if a known later character is not present.
5938 However, there is an option that disables these, for testing and for ensuring
5939 that all callouts do actually occur. */
5940
5941 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5942 {
5943 /* Advance to a unique first byte if there is one. */
5944
5945 if (first_byte >= 0)
5946 {
5947 if (first_byte_caseless)
5948 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5949 start_match++;
5950 else
5951 while (start_match < end_subject && *start_match != first_byte)
5952 start_match++;
5953 }
5954
5955 /* Or to just after a linebreak for a multiline match */
5956
5957 else if (startline)
5958 {
5959 if (start_match > md->start_subject + start_offset)
5960 {
5961 #ifdef SUPPORT_UTF8
5962 if (utf8)
5963 {
5964 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5965 {
5966 start_match++;
5967 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5968 start_match++;
5969 }
5970 }
5971 else
5972 #endif
5973 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5974 start_match++;
5975
5976 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5977 and we are now at a LF, advance the match position by one more character.
5978 */
5979
5980 if (start_match[-1] == CHAR_CR &&
5981 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5982 start_match < end_subject &&
5983 *start_match == CHAR_NL)
5984 start_match++;
5985 }
5986 }
5987
5988 /* Or to a non-unique first byte after study */
5989
5990 else if (start_bits != NULL)
5991 {
5992 while (start_match < end_subject)
5993 {
5994 register unsigned int c = *start_match;
5995 if ((start_bits[c/8] & (1 << (c&7))) == 0)
5996 {
5997 start_match++;
5998 #ifdef SUPPORT_UTF8
5999 if (utf8)
6000 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6001 start_match++;
6002 #endif
6003 }
6004 else break;
6005 }
6006 }
6007 } /* Starting optimizations */
6008
6009 /* Restore fudged end_subject */
6010
6011 end_subject = save_end_subject;
6012
6013 /* The following two optimizations are disabled for partial matching or if
6014 disabling is explicitly requested. */
6015
6016 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6017 {
6018 /* If the pattern was studied, a minimum subject length may be set. This is
6019 a lower bound; no actual string of that length may actually match the
6020 pattern. Although the value is, strictly, in characters, we treat it as
6021 bytes to avoid spending too much time in this optimization. */
6022
6023 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6024 (pcre_uint32)(end_subject - start_match) < study->minlength)
6025 {
6026 rc = MATCH_NOMATCH;
6027 break;
6028 }
6029
6030 /* If req_byte is set, we know that that character must appear in the
6031 subject for the match to succeed. If the first character is set, req_byte
6032 must be later in the subject; otherwise the test starts at the match point.
6033 This optimization can save a huge amount of backtracking in patterns with
6034 nested unlimited repeats that aren't going to match. Writing separate code
6035 for cased/caseless versions makes it go faster, as does using an
6036 autoincrement and backing off on a match.
6037
6038 HOWEVER: when the subject string is very, very long, searching to its end
6039 can take a long time, and give bad performance on quite ordinary patterns.
6040 This showed up when somebody was matching something like /^\d+C/ on a
6041 32-megabyte string... so we don't do this when the string is sufficiently
6042 long. */
6043
6044 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6045 {
6046 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6047
6048 /* We don't need to repeat the search if we haven't yet reached the
6049 place we found it at last time. */
6050
6051 if (p > req_byte_ptr)
6052 {
6053 if (req_byte_caseless)
6054 {
6055 while (p < end_subject)
6056 {
6057 register int pp = *p++;
6058 if (pp == req_byte || pp == req_byte2) { p--; break; }
6059 }
6060 }
6061 else
6062 {
6063 while (p < end_subject)
6064 {
6065 if (*p++ == req_byte) { p--; break; }
6066 }
6067 }
6068
6069 /* If we can't find the required character, break the matching loop,
6070 forcing a match failure. */
6071
6072 if (p >= end_subject)
6073 {
6074 rc = MATCH_NOMATCH;
6075 break;
6076 }
6077
6078 /* If we have found the required character, save the point where we
6079 found it, so that we don't search again next time round the loop if
6080 the start hasn't passed this character yet. */
6081
6082 req_byte_ptr = p;
6083 }
6084 }
6085 }
6086
6087 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6088 printf(">>>> Match against: ");
6089 pchars(start_match, end_subject - start_match, TRUE, md);
6090 printf("\n");
6091 #endif
6092
6093 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6094 first starting point for which a partial match was found. */
6095
6096 md->start_match_ptr = start_match;
6097 md->start_used_ptr = start_match;
6098 md->match_call_count = 0;
6099 rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6100 0, 0);
6101 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6102
6103 switch(rc)
6104 {
6105 /* SKIP passes back the next starting point explicitly, but if it is the
6106 same as the match we have just done, treat it as NOMATCH. */
6107
6108 case MATCH_SKIP:
6109 if (md->start_match_ptr != start_match)
6110 {
6111 new_start_match = md->start_match_ptr;
6112 break;
6113 }
6114 /* Fall through */
6115
6116 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6117 the SKIP's arg was not found. We also treat this as NOMATCH. */
6118
6119 case MATCH_SKIP_ARG:
6120 /* Fall through */
6121
6122 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6123 exactly like PRUNE. */
6124
6125 case MATCH_NOMATCH:
6126 case MATCH_PRUNE:
6127 case MATCH_THEN:
6128 new_start_match = start_match + 1;
6129 #ifdef SUPPORT_UTF8
6130 if (utf8)
6131 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6132 new_start_match++;
6133 #endif
6134 break;
6135
6136 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6137
6138 case MATCH_COMMIT:
6139 rc = MATCH_NOMATCH;
6140 goto ENDLOOP;
6141
6142 /* Any other return is either a match, or some kind of error. */
6143
6144 default:
6145 goto ENDLOOP;
6146 }
6147
6148 /* Control reaches here for the various types of "no match at this point"
6149 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6150
6151 rc = MATCH_NOMATCH;
6152
6153 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6154 newline in the subject (though it may continue over the newline). Therefore,
6155 if we have just failed to match, starting at a newline, do not continue. */
6156
6157 if (firstline && IS_NEWLINE(start_match)) break;
6158
6159 /* Advance to new matching position */
6160
6161 start_match = new_start_match;
6162
6163 /* Break the loop if the pattern is anchored or if we have passed the end of
6164 the subject. */
6165
6166 if (anchored || start_match > end_subject) break;
6167
6168 /* If we have just passed a CR and we are now at a LF, and the pattern does
6169 not contain any explicit matches for \r or \n, and the newline option is CRLF
6170 or ANY or ANYCRLF, advance the match position by one more character. */
6171
6172 if (start_match[-1] == CHAR_CR &&
6173 start_match < end_subject &&
6174 *start_match == CHAR_NL &&
6175 (re->flags & PCRE_HASCRORLF) == 0 &&
6176 (md->nltype == NLTYPE_ANY ||
6177 md->nltype == NLTYPE_ANYCRLF ||
6178 md->nllen == 2))
6179 start_match++;
6180
6181 md->mark = NULL; /* Reset for start of next match attempt */
6182 } /* End of for(;;) "bumpalong" loop */
6183
6184 /* ==========================================================================*/
6185
6186 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6187 conditions is true:
6188
6189 (1) The pattern is anchored or the match was failed by (*COMMIT);
6190
6191 (2) We are past the end of the subject;
6192
6193 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6194 this option requests that a match occur at or before the first newline in
6195 the subject.
6196
6197 When we have a match and the offset vector is big enough to deal with any
6198 backreferences, captured substring offsets will already be set up. In the case
6199 where we had to get some local store to hold offsets for backreference
6200 processing, copy those that we can. In this case there need not be overflow if
6201 certain parts of the pattern were not used, even though there are more
6202 capturing parentheses than vector slots. */
6203
6204 ENDLOOP:
6205
6206 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6207 {
6208 if (using_temporary_offsets)
6209 {
6210 if (offsetcount >= 4)
6211 {
6212 memcpy(offsets + 2, md->offset_vector + 2,
6213 (offsetcount - 2) * sizeof(int));
6214 DPRINTF(("Copied offsets from temporary memory\n"));
6215 }
6216 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6217 DPRINTF(("Freeing temporary memory\n"));
6218 (pcre_free)(md->offset_vector);
6219 }
6220
6221 /* Set the return code to the number of captured strings, or 0 if there are
6222 too many to fit into the vector. */
6223
6224 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6225
6226 /* If there is space, set up the whole thing as substring 0. The value of
6227 md->start_match_ptr might be modified if \K was encountered on the success
6228 matching path. */
6229
6230 if (offsetcount < 2) rc = 0; else
6231 {
6232 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6233 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6234 }
6235
6236 DPRINTF((">>>> returning %d\n", rc));
6237 goto RETURN_MARK;
6238 }
6239
6240 /* Control gets here if there has been an error, or if the overall match
6241 attempt has failed at all permitted starting positions. */
6242
6243 if (using_temporary_offsets)
6244 {
6245 DPRINTF(("Freeing temporary memory\n"));
6246 (pcre_free)(md->offset_vector);
6247 }
6248
6249 /* For anything other than nomatch or partial match, just return the code. */
6250
6251 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6252 {
6253 DPRINTF((">>>> error: returning %d\n", rc));
6254 return rc;
6255 }
6256
6257 /* Handle partial matches - disable any mark data */
6258
6259 if (start_partial != NULL)
6260 {
6261 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6262 md->mark = NULL;
6263 if (offsetcount > 1)
6264 {
6265 offsets[0] = (int)(start_partial - (USPTR)subject);
6266 offsets[1] = (int)(end_subject - (USPTR)subject);
6267 }
6268 rc = PCRE_ERROR_PARTIAL;
6269 }
6270
6271 /* This is the classic nomatch case */
6272
6273 else
6274 {
6275 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6276 rc = PCRE_ERROR_NOMATCH;
6277 }
6278
6279 /* Return the MARK data if it has been requested. */
6280
6281 RETURN_MARK:
6282
6283 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6284 *(extra_data->mark) = (unsigned char *)(md->mark);
6285 return rc;
6286 }
6287
6288 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12