/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 598 - (show annotations) (download)
Sat May 7 15:37:31 2011 UTC (3 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 187967 byte(s)
Pass back detailed info when UTF-8 check fails at runtime.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #define NLBLOCK md /* Block containing newline information */
50 #define PSSTART start_subject /* Field containing processed string start */
51 #define PSEND end_subject /* Field containing processed string end */
52
53 #include "pcre_internal.h"
54
55 /* Undefine some potentially clashing cpp symbols */
56
57 #undef min
58 #undef max
59
60 /* Flag bits for the match() function */
61
62 #define match_condassert 0x01 /* Called to check a condition assertion */
63 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
64
65 /* Non-error returns from the match() function. Error returns are externally
66 defined PCRE_ERROR_xxx codes, which are all negative. */
67
68 #define MATCH_MATCH 1
69 #define MATCH_NOMATCH 0
70
71 /* Special internal returns from the match() function. Make them sufficiently
72 negative to avoid the external error codes. */
73
74 #define MATCH_ACCEPT (-999)
75 #define MATCH_COMMIT (-998)
76 #define MATCH_PRUNE (-997)
77 #define MATCH_SKIP (-996)
78 #define MATCH_SKIP_ARG (-995)
79 #define MATCH_THEN (-994)
80
81 /* This is a convenience macro for code that occurs many times. */
82
83 #define MRRETURN(ra) \
84 { \
85 md->mark = markptr; \
86 RRETURN(ra); \
87 }
88
89 /* Maximum number of ints of offset to save on the stack for recursive calls.
90 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91 because the offset vector is always a multiple of 3 long. */
92
93 #define REC_STACK_SAVE_MAX 30
94
95 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
96
97 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
98 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
99
100
101
102 #ifdef PCRE_DEBUG
103 /*************************************************
104 * Debugging function to print chars *
105 *************************************************/
106
107 /* Print a sequence of chars in printable format, stopping at the end of the
108 subject if the requested.
109
110 Arguments:
111 p points to characters
112 length number to print
113 is_subject TRUE if printing from within md->start_subject
114 md pointer to matching data block, if is_subject is TRUE
115
116 Returns: nothing
117 */
118
119 static void
120 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
121 {
122 unsigned int c;
123 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
124 while (length-- > 0)
125 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
126 }
127 #endif
128
129
130
131 /*************************************************
132 * Match a back-reference *
133 *************************************************/
134
135 /* Normally, if a back reference hasn't been set, the length that is passed is
136 negative, so the match always fails. However, in JavaScript compatibility mode,
137 the length passed is zero. Note that in caseless UTF-8 mode, the number of
138 subject bytes matched may be different to the number of reference bytes.
139
140 Arguments:
141 offset index into the offset vector
142 eptr pointer into the subject
143 length length of reference to be matched (number of bytes)
144 md points to match data block
145 ims the ims flags
146
147 Returns: < 0 if not matched, otherwise the number of subject bytes matched
148 */
149
150 static int
151 match_ref(int offset, register USPTR eptr, int length, match_data *md,
152 unsigned long int ims)
153 {
154 USPTR eptr_start = eptr;
155 register USPTR p = md->start_subject + md->offset_vector[offset];
156
157 #ifdef PCRE_DEBUG
158 if (eptr >= md->end_subject)
159 printf("matching subject <null>");
160 else
161 {
162 printf("matching subject ");
163 pchars(eptr, length, TRUE, md);
164 }
165 printf(" against backref ");
166 pchars(p, length, FALSE, md);
167 printf("\n");
168 #endif
169
170 /* Always fail if reference not set (and not JavaScript compatible). */
171
172 if (length < 0) return -1;
173
174 /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175 properly if Unicode properties are supported. Otherwise, we can check only
176 ASCII characters. */
177
178 if ((ims & PCRE_CASELESS) != 0)
179 {
180 #ifdef SUPPORT_UTF8
181 #ifdef SUPPORT_UCP
182 if (md->utf8)
183 {
184 /* Match characters up to the end of the reference. NOTE: the number of
185 bytes matched may differ, because there are some characters whose upper and
186 lower case versions code as different numbers of bytes. For example, U+023A
187 (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188 a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189 the latter. It is important, therefore, to check the length along the
190 reference, not along the subject (earlier code did this wrong). */
191
192 USPTR endptr = p + length;
193 while (p < endptr)
194 {
195 int c, d;
196 if (eptr >= md->end_subject) return -1;
197 GETCHARINC(c, eptr);
198 GETCHARINC(d, p);
199 if (c != d && c != UCD_OTHERCASE(d)) return -1;
200 }
201 }
202 else
203 #endif
204 #endif
205
206 /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207 is no UCP support. */
208 {
209 if (eptr + length > md->end_subject) return -1;
210 while (length-- > 0)
211 { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212 }
213 }
214
215 /* In the caseful case, we can just compare the bytes, whether or not we
216 are in UTF-8 mode. */
217
218 else
219 {
220 if (eptr + length > md->end_subject) return -1;
221 while (length-- > 0) if (*p++ != *eptr++) return -1;
222 }
223
224 return eptr - eptr_start;
225 }
226
227
228
229 /***************************************************************************
230 ****************************************************************************
231 RECURSION IN THE match() FUNCTION
232
233 The match() function is highly recursive, though not every recursive call
234 increases the recursive depth. Nevertheless, some regular expressions can cause
235 it to recurse to a great depth. I was writing for Unix, so I just let it call
236 itself recursively. This uses the stack for saving everything that has to be
237 saved for a recursive call. On Unix, the stack can be large, and this works
238 fine.
239
240 It turns out that on some non-Unix-like systems there are problems with
241 programs that use a lot of stack. (This despite the fact that every last chip
242 has oodles of memory these days, and techniques for extending the stack have
243 been known for decades.) So....
244
245 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
246 calls by keeping local variables that need to be preserved in blocks of memory
247 obtained from malloc() instead instead of on the stack. Macros are used to
248 achieve this so that the actual code doesn't look very different to what it
249 always used to.
250
251 The original heap-recursive code used longjmp(). However, it seems that this
252 can be very slow on some operating systems. Following a suggestion from Stan
253 Switzer, the use of longjmp() has been abolished, at the cost of having to
254 provide a unique number for each call to RMATCH. There is no way of generating
255 a sequence of numbers at compile time in C. I have given them names, to make
256 them stand out more clearly.
257
258 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260 tests. Furthermore, not using longjmp() means that local dynamic variables
261 don't have indeterminate values; this has meant that the frame size can be
262 reduced because the result can be "passed back" by straight setting of the
263 variable instead of being passed in the frame.
264 ****************************************************************************
265 ***************************************************************************/
266
267 /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268 below must be updated in sync. */
269
270 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
271 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274 RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275 RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276 RM61, RM62 };
277
278 /* These versions of the macros use the stack, as normal. There are debugging
279 versions and production versions. Note that the "rw" argument of RMATCH isn't
280 actually used in this definition. */
281
282 #ifndef NO_RECURSE
283 #define REGISTER register
284
285 #ifdef PCRE_DEBUG
286 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
287 { \
288 printf("match() called in line %d\n", __LINE__); \
289 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
290 printf("to line %d\n", __LINE__); \
291 }
292 #define RRETURN(ra) \
293 { \
294 printf("match() returned %d from line %d ", ra, __LINE__); \
295 return ra; \
296 }
297 #else
298 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
299 rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
300 #define RRETURN(ra) return ra
301 #endif
302
303 #else
304
305
306 /* These versions of the macros manage a private stack on the heap. Note that
307 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308 argument of match(), which never changes. */
309
310 #define REGISTER
311
312 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
313 {\
314 heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315 if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316 frame->Xwhere = rw; \
317 newframe->Xeptr = ra;\
318 newframe->Xecode = rb;\
319 newframe->Xmstart = mstart;\
320 newframe->Xmarkptr = markptr;\
321 newframe->Xoffset_top = rc;\
322 newframe->Xims = re;\
323 newframe->Xeptrb = rf;\
324 newframe->Xflags = rg;\
325 newframe->Xrdepth = frame->Xrdepth + 1;\
326 newframe->Xprevframe = frame;\
327 frame = newframe;\
328 DPRINTF(("restarting from line %d\n", __LINE__));\
329 goto HEAP_RECURSE;\
330 L_##rw:\
331 DPRINTF(("jumped back to line %d\n", __LINE__));\
332 }
333
334 #define RRETURN(ra)\
335 {\
336 heapframe *oldframe = frame;\
337 frame = oldframe->Xprevframe;\
338 (pcre_stack_free)(oldframe);\
339 if (frame != NULL)\
340 {\
341 rrc = ra;\
342 goto HEAP_RETURN;\
343 }\
344 return ra;\
345 }
346
347
348 /* Structure for remembering the local variables in a private frame */
349
350 typedef struct heapframe {
351 struct heapframe *Xprevframe;
352
353 /* Function arguments that may change */
354
355 USPTR Xeptr;
356 const uschar *Xecode;
357 USPTR Xmstart;
358 USPTR Xmarkptr;
359 int Xoffset_top;
360 long int Xims;
361 eptrblock *Xeptrb;
362 int Xflags;
363 unsigned int Xrdepth;
364
365 /* Function local variables */
366
367 USPTR Xcallpat;
368 #ifdef SUPPORT_UTF8
369 USPTR Xcharptr;
370 #endif
371 USPTR Xdata;
372 USPTR Xnext;
373 USPTR Xpp;
374 USPTR Xprev;
375 USPTR Xsaved_eptr;
376
377 recursion_info Xnew_recursive;
378
379 BOOL Xcur_is_word;
380 BOOL Xcondition;
381 BOOL Xprev_is_word;
382
383 unsigned long int Xoriginal_ims;
384
385 #ifdef SUPPORT_UCP
386 int Xprop_type;
387 int Xprop_value;
388 int Xprop_fail_result;
389 int Xprop_category;
390 int Xprop_chartype;
391 int Xprop_script;
392 int Xoclength;
393 uschar Xocchars[8];
394 #endif
395
396 int Xcodelink;
397 int Xctype;
398 unsigned int Xfc;
399 int Xfi;
400 int Xlength;
401 int Xmax;
402 int Xmin;
403 int Xnumber;
404 int Xoffset;
405 int Xop;
406 int Xsave_capture_last;
407 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
408 int Xstacksave[REC_STACK_SAVE_MAX];
409
410 eptrblock Xnewptrb;
411
412 /* Where to jump back to */
413
414 int Xwhere;
415
416 } heapframe;
417
418 #endif
419
420
421 /***************************************************************************
422 ***************************************************************************/
423
424
425
426 /*************************************************
427 * Match from current position *
428 *************************************************/
429
430 /* This function is called recursively in many circumstances. Whenever it
431 returns a negative (error) response, the outer incarnation must also return the
432 same response. */
433
434 /* These macros pack up tests that are used for partial matching, and which
435 appears several times in the code. We set the "hit end" flag if the pointer is
436 at the end of the subject and also past the start of the subject (i.e.
437 something has been matched). For hard partial matching, we then return
438 immediately. The second one is used when we already know we are past the end of
439 the subject. */
440
441 #define CHECK_PARTIAL()\
442 if (md->partial != 0 && eptr >= md->end_subject && \
443 eptr > md->start_used_ptr) \
444 { \
445 md->hitend = TRUE; \
446 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
447 }
448
449 #define SCHECK_PARTIAL()\
450 if (md->partial != 0 && eptr > md->start_used_ptr) \
451 { \
452 md->hitend = TRUE; \
453 if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
454 }
455
456
457 /* Performance note: It might be tempting to extract commonly used fields from
458 the md structure (e.g. utf8, end_subject) into individual variables to improve
459 performance. Tests using gcc on a SPARC disproved this; in the first case, it
460 made performance worse.
461
462 Arguments:
463 eptr pointer to current character in subject
464 ecode pointer to current position in compiled code
465 mstart pointer to the current match start position (can be modified
466 by encountering \K)
467 markptr pointer to the most recent MARK name, or NULL
468 offset_top current top pointer
469 md pointer to "static" info for the match
470 ims current /i, /m, and /s options
471 eptrb pointer to chain of blocks containing eptr at start of
472 brackets - for testing for empty matches
473 flags can contain
474 match_condassert - this is an assertion condition
475 match_cbegroup - this is the start of an unlimited repeat
476 group that can match an empty string
477 rdepth the recursion depth
478
479 Returns: MATCH_MATCH if matched ) these values are >= 0
480 MATCH_NOMATCH if failed to match )
481 a negative MATCH_xxx value for PRUNE, SKIP, etc
482 a negative PCRE_ERROR_xxx value if aborted by an error condition
483 (e.g. stopped by repeated call or recursion limit)
484 */
485
486 static int
487 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
488 const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
489 eptrblock *eptrb, int flags, unsigned int rdepth)
490 {
491 /* These variables do not need to be preserved over recursion in this function,
492 so they can be ordinary variables in all cases. Mark some of them with
493 "register" because they are used a lot in loops. */
494
495 register int rrc; /* Returns from recursive calls */
496 register int i; /* Used for loops not involving calls to RMATCH() */
497 register unsigned int c; /* Character values not kept over RMATCH() calls */
498 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
499
500 BOOL minimize, possessive; /* Quantifier options */
501 int condcode;
502
503 /* When recursion is not being used, all "local" variables that have to be
504 preserved over calls to RMATCH() are part of a "frame" which is obtained from
505 heap storage. Set up the top-level frame here; others are obtained from the
506 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
507
508 #ifdef NO_RECURSE
509 heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
510 if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
511 frame->Xprevframe = NULL; /* Marks the top level */
512
513 /* Copy in the original argument variables */
514
515 frame->Xeptr = eptr;
516 frame->Xecode = ecode;
517 frame->Xmstart = mstart;
518 frame->Xmarkptr = markptr;
519 frame->Xoffset_top = offset_top;
520 frame->Xims = ims;
521 frame->Xeptrb = eptrb;
522 frame->Xflags = flags;
523 frame->Xrdepth = rdepth;
524
525 /* This is where control jumps back to to effect "recursion" */
526
527 HEAP_RECURSE:
528
529 /* Macros make the argument variables come from the current frame */
530
531 #define eptr frame->Xeptr
532 #define ecode frame->Xecode
533 #define mstart frame->Xmstart
534 #define markptr frame->Xmarkptr
535 #define offset_top frame->Xoffset_top
536 #define ims frame->Xims
537 #define eptrb frame->Xeptrb
538 #define flags frame->Xflags
539 #define rdepth frame->Xrdepth
540
541 /* Ditto for the local variables */
542
543 #ifdef SUPPORT_UTF8
544 #define charptr frame->Xcharptr
545 #endif
546 #define callpat frame->Xcallpat
547 #define codelink frame->Xcodelink
548 #define data frame->Xdata
549 #define next frame->Xnext
550 #define pp frame->Xpp
551 #define prev frame->Xprev
552 #define saved_eptr frame->Xsaved_eptr
553
554 #define new_recursive frame->Xnew_recursive
555
556 #define cur_is_word frame->Xcur_is_word
557 #define condition frame->Xcondition
558 #define prev_is_word frame->Xprev_is_word
559
560 #define original_ims frame->Xoriginal_ims
561
562 #ifdef SUPPORT_UCP
563 #define prop_type frame->Xprop_type
564 #define prop_value frame->Xprop_value
565 #define prop_fail_result frame->Xprop_fail_result
566 #define prop_category frame->Xprop_category
567 #define prop_chartype frame->Xprop_chartype
568 #define prop_script frame->Xprop_script
569 #define oclength frame->Xoclength
570 #define occhars frame->Xocchars
571 #endif
572
573 #define ctype frame->Xctype
574 #define fc frame->Xfc
575 #define fi frame->Xfi
576 #define length frame->Xlength
577 #define max frame->Xmax
578 #define min frame->Xmin
579 #define number frame->Xnumber
580 #define offset frame->Xoffset
581 #define op frame->Xop
582 #define save_capture_last frame->Xsave_capture_last
583 #define save_offset1 frame->Xsave_offset1
584 #define save_offset2 frame->Xsave_offset2
585 #define save_offset3 frame->Xsave_offset3
586 #define stacksave frame->Xstacksave
587
588 #define newptrb frame->Xnewptrb
589
590 /* When recursion is being used, local variables are allocated on the stack and
591 get preserved during recursion in the normal way. In this environment, fi and
592 i, and fc and c, can be the same variables. */
593
594 #else /* NO_RECURSE not defined */
595 #define fi i
596 #define fc c
597
598
599 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
600 const uschar *charptr; /* in small blocks of the code. My normal */
601 #endif /* style of coding would have declared */
602 const uschar *callpat; /* them within each of those blocks. */
603 const uschar *data; /* However, in order to accommodate the */
604 const uschar *next; /* version of this code that uses an */
605 USPTR pp; /* external "stack" implemented on the */
606 const uschar *prev; /* heap, it is easier to declare them all */
607 USPTR saved_eptr; /* here, so the declarations can be cut */
608 /* out in a block. The only declarations */
609 recursion_info new_recursive; /* within blocks below are for variables */
610 /* that do not have to be preserved over */
611 BOOL cur_is_word; /* a recursive call to RMATCH(). */
612 BOOL condition;
613 BOOL prev_is_word;
614
615 unsigned long int original_ims;
616
617 #ifdef SUPPORT_UCP
618 int prop_type;
619 int prop_value;
620 int prop_fail_result;
621 int prop_category;
622 int prop_chartype;
623 int prop_script;
624 int oclength;
625 uschar occhars[8];
626 #endif
627
628 int codelink;
629 int ctype;
630 int length;
631 int max;
632 int min;
633 int number;
634 int offset;
635 int op;
636 int save_capture_last;
637 int save_offset1, save_offset2, save_offset3;
638 int stacksave[REC_STACK_SAVE_MAX];
639
640 eptrblock newptrb;
641 #endif /* NO_RECURSE */
642
643 /* These statements are here to stop the compiler complaining about unitialized
644 variables. */
645
646 #ifdef SUPPORT_UCP
647 prop_value = 0;
648 prop_fail_result = 0;
649 #endif
650
651
652 /* This label is used for tail recursion, which is used in a few cases even
653 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
654 used. Thanks to Ian Taylor for noticing this possibility and sending the
655 original patch. */
656
657 TAIL_RECURSE:
658
659 /* OK, now we can get on with the real code of the function. Recursive calls
660 are specified by the macro RMATCH and RRETURN is used to return. When
661 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
662 and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
663 defined). However, RMATCH isn't like a function call because it's quite a
664 complicated macro. It has to be used in one particular way. This shouldn't,
665 however, impact performance when true recursion is being used. */
666
667 #ifdef SUPPORT_UTF8
668 utf8 = md->utf8; /* Local copy of the flag */
669 #else
670 utf8 = FALSE;
671 #endif
672
673 /* First check that we haven't called match() too many times, or that we
674 haven't exceeded the recursive call limit. */
675
676 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
677 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
678
679 original_ims = ims; /* Save for resetting on ')' */
680
681 /* At the start of a group with an unlimited repeat that may match an empty
682 string, the match_cbegroup flag is set. When this is the case, add the current
683 subject pointer to the chain of such remembered pointers, to be checked when we
684 hit the closing ket, in order to break infinite loops that match no characters.
685 When match() is called in other circumstances, don't add to the chain. The
686 match_cbegroup flag must NOT be used with tail recursion, because the memory
687 block that is used is on the stack, so a new one may be required for each
688 match(). */
689
690 if ((flags & match_cbegroup) != 0)
691 {
692 newptrb.epb_saved_eptr = eptr;
693 newptrb.epb_prev = eptrb;
694 eptrb = &newptrb;
695 }
696
697 /* Now start processing the opcodes. */
698
699 for (;;)
700 {
701 minimize = possessive = FALSE;
702 op = *ecode;
703
704 switch(op)
705 {
706 case OP_MARK:
707 markptr = ecode + 2;
708 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
709 ims, eptrb, flags, RM55);
710
711 /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
712 argument, and we must check whether that argument matches this MARK's
713 argument. It is passed back in md->start_match_ptr (an overloading of that
714 variable). If it does match, we reset that variable to the current subject
715 position and return MATCH_SKIP. Otherwise, pass back the return code
716 unaltered. */
717
718 if (rrc == MATCH_SKIP_ARG &&
719 strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
720 {
721 md->start_match_ptr = eptr;
722 RRETURN(MATCH_SKIP);
723 }
724
725 if (md->mark == NULL) md->mark = markptr;
726 RRETURN(rrc);
727
728 case OP_FAIL:
729 MRRETURN(MATCH_NOMATCH);
730
731 /* COMMIT overrides PRUNE, SKIP, and THEN */
732
733 case OP_COMMIT:
734 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
735 ims, eptrb, flags, RM52);
736 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
737 rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
738 rrc != MATCH_THEN)
739 RRETURN(rrc);
740 MRRETURN(MATCH_COMMIT);
741
742 /* PRUNE overrides THEN */
743
744 case OP_PRUNE:
745 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
746 ims, eptrb, flags, RM51);
747 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
748 MRRETURN(MATCH_PRUNE);
749
750 case OP_PRUNE_ARG:
751 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752 ims, eptrb, flags, RM56);
753 if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
754 md->mark = ecode + 2;
755 RRETURN(MATCH_PRUNE);
756
757 /* SKIP overrides PRUNE and THEN */
758
759 case OP_SKIP:
760 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
761 ims, eptrb, flags, RM53);
762 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
763 RRETURN(rrc);
764 md->start_match_ptr = eptr; /* Pass back current position */
765 MRRETURN(MATCH_SKIP);
766
767 case OP_SKIP_ARG:
768 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
769 ims, eptrb, flags, RM57);
770 if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
771 RRETURN(rrc);
772
773 /* Pass back the current skip name by overloading md->start_match_ptr and
774 returning the special MATCH_SKIP_ARG return code. This will either be
775 caught by a matching MARK, or get to the top, where it is treated the same
776 as PRUNE. */
777
778 md->start_match_ptr = ecode + 2;
779 RRETURN(MATCH_SKIP_ARG);
780
781 /* For THEN (and THEN_ARG) we pass back the address of the bracket or
782 the alt that is at the start of the current branch. This makes it possible
783 to skip back past alternatives that precede the THEN within the current
784 branch. */
785
786 case OP_THEN:
787 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
788 ims, eptrb, flags, RM54);
789 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
790 md->start_match_ptr = ecode - GET(ecode, 1);
791 MRRETURN(MATCH_THEN);
792
793 case OP_THEN_ARG:
794 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
795 offset_top, md, ims, eptrb, flags, RM58);
796 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
797 md->start_match_ptr = ecode - GET(ecode, 1);
798 md->mark = ecode + LINK_SIZE + 2;
799 RRETURN(MATCH_THEN);
800
801 /* Handle a capturing bracket. If there is space in the offset vector, save
802 the current subject position in the working slot at the top of the vector.
803 We mustn't change the current values of the data slot, because they may be
804 set from a previous iteration of this group, and be referred to by a
805 reference inside the group.
806
807 If the bracket fails to match, we need to restore this value and also the
808 values of the final offsets, in case they were set by a previous iteration
809 of the same bracket.
810
811 If there isn't enough space in the offset vector, treat this as if it were
812 a non-capturing bracket. Don't worry about setting the flag for the error
813 case here; that is handled in the code for KET. */
814
815 case OP_CBRA:
816 case OP_SCBRA:
817 number = GET2(ecode, 1+LINK_SIZE);
818 offset = number << 1;
819
820 #ifdef PCRE_DEBUG
821 printf("start bracket %d\n", number);
822 printf("subject=");
823 pchars(eptr, 16, TRUE, md);
824 printf("\n");
825 #endif
826
827 if (offset < md->offset_max)
828 {
829 save_offset1 = md->offset_vector[offset];
830 save_offset2 = md->offset_vector[offset+1];
831 save_offset3 = md->offset_vector[md->offset_end - number];
832 save_capture_last = md->capture_last;
833
834 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
835 md->offset_vector[md->offset_end - number] =
836 (int)(eptr - md->start_subject);
837
838 flags = (op == OP_SCBRA)? match_cbegroup : 0;
839 do
840 {
841 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
842 ims, eptrb, flags, RM1);
843 if (rrc != MATCH_NOMATCH &&
844 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
845 RRETURN(rrc);
846 md->capture_last = save_capture_last;
847 ecode += GET(ecode, 1);
848 }
849 while (*ecode == OP_ALT);
850
851 DPRINTF(("bracket %d failed\n", number));
852
853 md->offset_vector[offset] = save_offset1;
854 md->offset_vector[offset+1] = save_offset2;
855 md->offset_vector[md->offset_end - number] = save_offset3;
856
857 if (rrc != MATCH_THEN) md->mark = markptr;
858 RRETURN(MATCH_NOMATCH);
859 }
860
861 /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
862 as a non-capturing bracket. */
863
864 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
865 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
866
867 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
868
869 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
870 /* VVVVVVVVVVVVVVVVVVVVVVVVV */
871
872 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
873 final alternative within the brackets, we would return the result of a
874 recursive call to match() whatever happened. We can reduce stack usage by
875 turning this into a tail recursion, except in the case when match_cbegroup
876 is set.*/
877
878 case OP_BRA:
879 case OP_SBRA:
880 DPRINTF(("start non-capturing bracket\n"));
881 flags = (op >= OP_SBRA)? match_cbegroup : 0;
882 for (;;)
883 {
884 if (ecode[GET(ecode, 1)] != OP_ALT) /* Final alternative */
885 {
886 if (flags == 0) /* Not a possibly empty group */
887 {
888 ecode += _pcre_OP_lengths[*ecode];
889 DPRINTF(("bracket 0 tail recursion\n"));
890 goto TAIL_RECURSE;
891 }
892
893 /* Possibly empty group; can't use tail recursion. */
894
895 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
896 eptrb, flags, RM48);
897 if (rrc == MATCH_NOMATCH) md->mark = markptr;
898 RRETURN(rrc);
899 }
900
901 /* For non-final alternatives, continue the loop for a NOMATCH result;
902 otherwise return. */
903
904 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
905 eptrb, flags, RM2);
906 if (rrc != MATCH_NOMATCH &&
907 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
908 RRETURN(rrc);
909 ecode += GET(ecode, 1);
910 }
911 /* Control never reaches here. */
912
913 /* Conditional group: compilation checked that there are no more than
914 two branches. If the condition is false, skipping the first branch takes us
915 past the end if there is only one branch, but that's OK because that is
916 exactly what going to the ket would do. As there is only one branch to be
917 obeyed, we can use tail recursion to avoid using another stack frame. */
918
919 case OP_COND:
920 case OP_SCOND:
921 codelink= GET(ecode, 1);
922
923 /* Because of the way auto-callout works during compile, a callout item is
924 inserted between OP_COND and an assertion condition. */
925
926 if (ecode[LINK_SIZE+1] == OP_CALLOUT)
927 {
928 if (pcre_callout != NULL)
929 {
930 pcre_callout_block cb;
931 cb.version = 1; /* Version 1 of the callout block */
932 cb.callout_number = ecode[LINK_SIZE+2];
933 cb.offset_vector = md->offset_vector;
934 cb.subject = (PCRE_SPTR)md->start_subject;
935 cb.subject_length = (int)(md->end_subject - md->start_subject);
936 cb.start_match = (int)(mstart - md->start_subject);
937 cb.current_position = (int)(eptr - md->start_subject);
938 cb.pattern_position = GET(ecode, LINK_SIZE + 3);
939 cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
940 cb.capture_top = offset_top/2;
941 cb.capture_last = md->capture_last;
942 cb.callout_data = md->callout_data;
943 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
944 if (rrc < 0) RRETURN(rrc);
945 }
946 ecode += _pcre_OP_lengths[OP_CALLOUT];
947 }
948
949 condcode = ecode[LINK_SIZE+1];
950
951 /* Now see what the actual condition is */
952
953 if (condcode == OP_RREF || condcode == OP_NRREF) /* Recursion test */
954 {
955 if (md->recursive == NULL) /* Not recursing => FALSE */
956 {
957 condition = FALSE;
958 ecode += GET(ecode, 1);
959 }
960 else
961 {
962 int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
963 condition = (recno == RREF_ANY || recno == md->recursive->group_num);
964
965 /* If the test is for recursion into a specific subpattern, and it is
966 false, but the test was set up by name, scan the table to see if the
967 name refers to any other numbers, and test them. The condition is true
968 if any one is set. */
969
970 if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
971 {
972 uschar *slotA = md->name_table;
973 for (i = 0; i < md->name_count; i++)
974 {
975 if (GET2(slotA, 0) == recno) break;
976 slotA += md->name_entry_size;
977 }
978
979 /* Found a name for the number - there can be only one; duplicate
980 names for different numbers are allowed, but not vice versa. First
981 scan down for duplicates. */
982
983 if (i < md->name_count)
984 {
985 uschar *slotB = slotA;
986 while (slotB > md->name_table)
987 {
988 slotB -= md->name_entry_size;
989 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
990 {
991 condition = GET2(slotB, 0) == md->recursive->group_num;
992 if (condition) break;
993 }
994 else break;
995 }
996
997 /* Scan up for duplicates */
998
999 if (!condition)
1000 {
1001 slotB = slotA;
1002 for (i++; i < md->name_count; i++)
1003 {
1004 slotB += md->name_entry_size;
1005 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1006 {
1007 condition = GET2(slotB, 0) == md->recursive->group_num;
1008 if (condition) break;
1009 }
1010 else break;
1011 }
1012 }
1013 }
1014 }
1015
1016 /* Chose branch according to the condition */
1017
1018 ecode += condition? 3 : GET(ecode, 1);
1019 }
1020 }
1021
1022 else if (condcode == OP_CREF || condcode == OP_NCREF) /* Group used test */
1023 {
1024 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
1025 condition = offset < offset_top && md->offset_vector[offset] >= 0;
1026
1027 /* If the numbered capture is unset, but the reference was by name,
1028 scan the table to see if the name refers to any other numbers, and test
1029 them. The condition is true if any one is set. This is tediously similar
1030 to the code above, but not close enough to try to amalgamate. */
1031
1032 if (!condition && condcode == OP_NCREF)
1033 {
1034 int refno = offset >> 1;
1035 uschar *slotA = md->name_table;
1036
1037 for (i = 0; i < md->name_count; i++)
1038 {
1039 if (GET2(slotA, 0) == refno) break;
1040 slotA += md->name_entry_size;
1041 }
1042
1043 /* Found a name for the number - there can be only one; duplicate names
1044 for different numbers are allowed, but not vice versa. First scan down
1045 for duplicates. */
1046
1047 if (i < md->name_count)
1048 {
1049 uschar *slotB = slotA;
1050 while (slotB > md->name_table)
1051 {
1052 slotB -= md->name_entry_size;
1053 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054 {
1055 offset = GET2(slotB, 0) << 1;
1056 condition = offset < offset_top &&
1057 md->offset_vector[offset] >= 0;
1058 if (condition) break;
1059 }
1060 else break;
1061 }
1062
1063 /* Scan up for duplicates */
1064
1065 if (!condition)
1066 {
1067 slotB = slotA;
1068 for (i++; i < md->name_count; i++)
1069 {
1070 slotB += md->name_entry_size;
1071 if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1072 {
1073 offset = GET2(slotB, 0) << 1;
1074 condition = offset < offset_top &&
1075 md->offset_vector[offset] >= 0;
1076 if (condition) break;
1077 }
1078 else break;
1079 }
1080 }
1081 }
1082 }
1083
1084 /* Chose branch according to the condition */
1085
1086 ecode += condition? 3 : GET(ecode, 1);
1087 }
1088
1089 else if (condcode == OP_DEF) /* DEFINE - always false */
1090 {
1091 condition = FALSE;
1092 ecode += GET(ecode, 1);
1093 }
1094
1095 /* The condition is an assertion. Call match() to evaluate it - setting
1096 the final argument match_condassert causes it to stop at the end of an
1097 assertion. */
1098
1099 else
1100 {
1101 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1102 match_condassert, RM3);
1103 if (rrc == MATCH_MATCH)
1104 {
1105 condition = TRUE;
1106 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1107 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1108 }
1109 else if (rrc != MATCH_NOMATCH &&
1110 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1111 {
1112 RRETURN(rrc); /* Need braces because of following else */
1113 }
1114 else
1115 {
1116 condition = FALSE;
1117 ecode += codelink;
1118 }
1119 }
1120
1121 /* We are now at the branch that is to be obeyed. As there is only one,
1122 we can use tail recursion to avoid using another stack frame, except when
1123 match_cbegroup is required for an unlimited repeat of a possibly empty
1124 group. If the second alternative doesn't exist, we can just plough on. */
1125
1126 if (condition || *ecode == OP_ALT)
1127 {
1128 ecode += 1 + LINK_SIZE;
1129 if (op == OP_SCOND) /* Possibly empty group */
1130 {
1131 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1132 RRETURN(rrc);
1133 }
1134 else /* Group must match something */
1135 {
1136 flags = 0;
1137 goto TAIL_RECURSE;
1138 }
1139 }
1140 else /* Condition false & no alternative */
1141 {
1142 ecode += 1 + LINK_SIZE;
1143 }
1144 break;
1145
1146
1147 /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1148 to close any currently open capturing brackets. */
1149
1150 case OP_CLOSE:
1151 number = GET2(ecode, 1);
1152 offset = number << 1;
1153
1154 #ifdef PCRE_DEBUG
1155 printf("end bracket %d at *ACCEPT", number);
1156 printf("\n");
1157 #endif
1158
1159 md->capture_last = number;
1160 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1161 {
1162 md->offset_vector[offset] =
1163 md->offset_vector[md->offset_end - number];
1164 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1165 if (offset_top <= offset) offset_top = offset + 2;
1166 }
1167 ecode += 3;
1168 break;
1169
1170
1171 /* End of the pattern, either real or forced. If we are in a top-level
1172 recursion, we should restore the offsets appropriately and continue from
1173 after the call. */
1174
1175 case OP_ACCEPT:
1176 case OP_END:
1177 if (md->recursive != NULL && md->recursive->group_num == 0)
1178 {
1179 recursion_info *rec = md->recursive;
1180 DPRINTF(("End of pattern in a (?0) recursion\n"));
1181 md->recursive = rec->prevrec;
1182 memmove(md->offset_vector, rec->offset_save,
1183 rec->saved_max * sizeof(int));
1184 offset_top = rec->save_offset_top;
1185 ims = original_ims;
1186 ecode = rec->after_call;
1187 break;
1188 }
1189
1190 /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1191 set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1192 the subject. In both cases, backtracking will then try other alternatives,
1193 if any. */
1194
1195 if (eptr == mstart &&
1196 (md->notempty ||
1197 (md->notempty_atstart &&
1198 mstart == md->start_subject + md->start_offset)))
1199 MRRETURN(MATCH_NOMATCH);
1200
1201 /* Otherwise, we have a match. */
1202
1203 md->end_match_ptr = eptr; /* Record where we ended */
1204 md->end_offset_top = offset_top; /* and how many extracts were taken */
1205 md->start_match_ptr = mstart; /* and the start (\K can modify) */
1206
1207 /* For some reason, the macros don't work properly if an expression is
1208 given as the argument to MRRETURN when the heap is in use. */
1209
1210 rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1211 MRRETURN(rrc);
1212
1213 /* Change option settings */
1214
1215 case OP_OPT:
1216 ims = ecode[1];
1217 ecode += 2;
1218 DPRINTF(("ims set to %02lx\n", ims));
1219 break;
1220
1221 /* Assertion brackets. Check the alternative branches in turn - the
1222 matching won't pass the KET for an assertion. If any one branch matches,
1223 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1224 start of each branch to move the current point backwards, so the code at
1225 this level is identical to the lookahead case. */
1226
1227 case OP_ASSERT:
1228 case OP_ASSERTBACK:
1229 do
1230 {
1231 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1232 RM4);
1233 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1234 {
1235 mstart = md->start_match_ptr; /* In case \K reset it */
1236 break;
1237 }
1238 if (rrc != MATCH_NOMATCH &&
1239 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1240 RRETURN(rrc);
1241 ecode += GET(ecode, 1);
1242 }
1243 while (*ecode == OP_ALT);
1244 if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1245
1246 /* If checking an assertion for a condition, return MATCH_MATCH. */
1247
1248 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1249
1250 /* Continue from after the assertion, updating the offsets high water
1251 mark, since extracts may have been taken during the assertion. */
1252
1253 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254 ecode += 1 + LINK_SIZE;
1255 offset_top = md->end_offset_top;
1256 continue;
1257
1258 /* Negative assertion: all branches must fail to match. Encountering SKIP,
1259 PRUNE, or COMMIT means we must assume failure without checking subsequent
1260 branches. */
1261
1262 case OP_ASSERT_NOT:
1263 case OP_ASSERTBACK_NOT:
1264 do
1265 {
1266 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1267 RM5);
1268 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1269 if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1270 {
1271 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1272 break;
1273 }
1274 if (rrc != MATCH_NOMATCH &&
1275 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1276 RRETURN(rrc);
1277 ecode += GET(ecode,1);
1278 }
1279 while (*ecode == OP_ALT);
1280
1281 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
1282
1283 ecode += 1 + LINK_SIZE;
1284 continue;
1285
1286 /* Move the subject pointer back. This occurs only at the start of
1287 each branch of a lookbehind assertion. If we are too close to the start to
1288 move back, this match function fails. When working with UTF-8 we move
1289 back a number of characters, not bytes. */
1290
1291 case OP_REVERSE:
1292 #ifdef SUPPORT_UTF8
1293 if (utf8)
1294 {
1295 i = GET(ecode, 1);
1296 while (i-- > 0)
1297 {
1298 eptr--;
1299 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1300 BACKCHAR(eptr);
1301 }
1302 }
1303 else
1304 #endif
1305
1306 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1307
1308 {
1309 eptr -= GET(ecode, 1);
1310 if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1311 }
1312
1313 /* Save the earliest consulted character, then skip to next op code */
1314
1315 if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1316 ecode += 1 + LINK_SIZE;
1317 break;
1318
1319 /* The callout item calls an external function, if one is provided, passing
1320 details of the match so far. This is mainly for debugging, though the
1321 function is able to force a failure. */
1322
1323 case OP_CALLOUT:
1324 if (pcre_callout != NULL)
1325 {
1326 pcre_callout_block cb;
1327 cb.version = 1; /* Version 1 of the callout block */
1328 cb.callout_number = ecode[1];
1329 cb.offset_vector = md->offset_vector;
1330 cb.subject = (PCRE_SPTR)md->start_subject;
1331 cb.subject_length = (int)(md->end_subject - md->start_subject);
1332 cb.start_match = (int)(mstart - md->start_subject);
1333 cb.current_position = (int)(eptr - md->start_subject);
1334 cb.pattern_position = GET(ecode, 2);
1335 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1336 cb.capture_top = offset_top/2;
1337 cb.capture_last = md->capture_last;
1338 cb.callout_data = md->callout_data;
1339 if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1340 if (rrc < 0) RRETURN(rrc);
1341 }
1342 ecode += 2 + 2*LINK_SIZE;
1343 break;
1344
1345 /* Recursion either matches the current regex, or some subexpression. The
1346 offset data is the offset to the starting bracket from the start of the
1347 whole pattern. (This is so that it works from duplicated subpatterns.)
1348
1349 If there are any capturing brackets started but not finished, we have to
1350 save their starting points and reinstate them after the recursion. However,
1351 we don't know how many such there are (offset_top records the completed
1352 total) so we just have to save all the potential data. There may be up to
1353 65535 such values, which is too large to put on the stack, but using malloc
1354 for small numbers seems expensive. As a compromise, the stack is used when
1355 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
1356 is used. A problem is what to do if the malloc fails ... there is no way of
1357 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
1358 values on the stack, and accept that the rest may be wrong.
1359
1360 There are also other values that have to be saved. We use a chained
1361 sequence of blocks that actually live on the stack. Thanks to Robin Houston
1362 for the original version of this logic. */
1363
1364 case OP_RECURSE:
1365 {
1366 callpat = md->start_code + GET(ecode, 1);
1367 new_recursive.group_num = (callpat == md->start_code)? 0 :
1368 GET2(callpat, 1 + LINK_SIZE);
1369
1370 /* Add to "recursing stack" */
1371
1372 new_recursive.prevrec = md->recursive;
1373 md->recursive = &new_recursive;
1374
1375 /* Find where to continue from afterwards */
1376
1377 ecode += 1 + LINK_SIZE;
1378 new_recursive.after_call = ecode;
1379
1380 /* Now save the offset data. */
1381
1382 new_recursive.saved_max = md->offset_end;
1383 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1384 new_recursive.offset_save = stacksave;
1385 else
1386 {
1387 new_recursive.offset_save =
1388 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
1389 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1390 }
1391
1392 memcpy(new_recursive.offset_save, md->offset_vector,
1393 new_recursive.saved_max * sizeof(int));
1394 new_recursive.save_offset_top = offset_top;
1395
1396 /* OK, now we can do the recursion. For each top-level alternative we
1397 restore the offset and recursion data. */
1398
1399 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1400 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1401 do
1402 {
1403 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1404 md, ims, eptrb, flags, RM6);
1405 if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1406 {
1407 DPRINTF(("Recursion matched\n"));
1408 md->recursive = new_recursive.prevrec;
1409 if (new_recursive.offset_save != stacksave)
1410 (pcre_free)(new_recursive.offset_save);
1411 MRRETURN(MATCH_MATCH);
1412 }
1413 else if (rrc != MATCH_NOMATCH &&
1414 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415 {
1416 DPRINTF(("Recursion gave error %d\n", rrc));
1417 if (new_recursive.offset_save != stacksave)
1418 (pcre_free)(new_recursive.offset_save);
1419 RRETURN(rrc);
1420 }
1421
1422 md->recursive = &new_recursive;
1423 memcpy(md->offset_vector, new_recursive.offset_save,
1424 new_recursive.saved_max * sizeof(int));
1425 callpat += GET(callpat, 1);
1426 }
1427 while (*callpat == OP_ALT);
1428
1429 DPRINTF(("Recursion didn't match\n"));
1430 md->recursive = new_recursive.prevrec;
1431 if (new_recursive.offset_save != stacksave)
1432 (pcre_free)(new_recursive.offset_save);
1433 MRRETURN(MATCH_NOMATCH);
1434 }
1435 /* Control never reaches here */
1436
1437 /* "Once" brackets are like assertion brackets except that after a match,
1438 the point in the subject string is not moved back. Thus there can never be
1439 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1440 Check the alternative branches in turn - the matching won't pass the KET
1441 for this kind of subpattern. If any one branch matches, we carry on as at
1442 the end of a normal bracket, leaving the subject pointer, but resetting
1443 the start-of-match value in case it was changed by \K. */
1444
1445 case OP_ONCE:
1446 prev = ecode;
1447 saved_eptr = eptr;
1448
1449 do
1450 {
1451 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1452 if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
1453 {
1454 mstart = md->start_match_ptr;
1455 break;
1456 }
1457 if (rrc != MATCH_NOMATCH &&
1458 (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1459 RRETURN(rrc);
1460 ecode += GET(ecode,1);
1461 }
1462 while (*ecode == OP_ALT);
1463
1464 /* If hit the end of the group (which could be repeated), fail */
1465
1466 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1467
1468 /* Continue as from after the assertion, updating the offsets high water
1469 mark, since extracts may have been taken. */
1470
1471 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1472
1473 offset_top = md->end_offset_top;
1474 eptr = md->end_match_ptr;
1475
1476 /* For a non-repeating ket, just continue at this level. This also
1477 happens for a repeating ket if no characters were matched in the group.
1478 This is the forcible breaking of infinite loops as implemented in Perl
1479 5.005. If there is an options reset, it will get obeyed in the normal
1480 course of events. */
1481
1482 if (*ecode == OP_KET || eptr == saved_eptr)
1483 {
1484 ecode += 1+LINK_SIZE;
1485 break;
1486 }
1487
1488 /* The repeating kets try the rest of the pattern or restart from the
1489 preceding bracket, in the appropriate order. The second "call" of match()
1490 uses tail recursion, to avoid using another stack frame. We need to reset
1491 any options that changed within the bracket before re-running it, so
1492 check the next opcode. */
1493
1494 if (ecode[1+LINK_SIZE] == OP_OPT)
1495 {
1496 ims = (ims & ~PCRE_IMS) | ecode[4];
1497 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1498 }
1499
1500 if (*ecode == OP_KETRMIN)
1501 {
1502 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1503 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1504 ecode = prev;
1505 flags = 0;
1506 goto TAIL_RECURSE;
1507 }
1508 else /* OP_KETRMAX */
1509 {
1510 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1511 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1512 ecode += 1 + LINK_SIZE;
1513 flags = 0;
1514 goto TAIL_RECURSE;
1515 }
1516 /* Control never gets here */
1517
1518 /* An alternation is the end of a branch; scan along to find the end of the
1519 bracketed group and go to there. */
1520
1521 case OP_ALT:
1522 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1523 break;
1524
1525 /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1526 indicating that it may occur zero times. It may repeat infinitely, or not
1527 at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1528 with fixed upper repeat limits are compiled as a number of copies, with the
1529 optional ones preceded by BRAZERO or BRAMINZERO. */
1530
1531 case OP_BRAZERO:
1532 {
1533 next = ecode+1;
1534 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1535 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1536 do next += GET(next,1); while (*next == OP_ALT);
1537 ecode = next + 1 + LINK_SIZE;
1538 }
1539 break;
1540
1541 case OP_BRAMINZERO:
1542 {
1543 next = ecode+1;
1544 do next += GET(next, 1); while (*next == OP_ALT);
1545 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1546 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1547 ecode++;
1548 }
1549 break;
1550
1551 case OP_SKIPZERO:
1552 {
1553 next = ecode+1;
1554 do next += GET(next,1); while (*next == OP_ALT);
1555 ecode = next + 1 + LINK_SIZE;
1556 }
1557 break;
1558
1559 /* End of a group, repeated or non-repeating. */
1560
1561 case OP_KET:
1562 case OP_KETRMIN:
1563 case OP_KETRMAX:
1564 prev = ecode - GET(ecode, 1);
1565
1566 /* If this was a group that remembered the subject start, in order to break
1567 infinite repeats of empty string matches, retrieve the subject start from
1568 the chain. Otherwise, set it NULL. */
1569
1570 if (*prev >= OP_SBRA)
1571 {
1572 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1573 eptrb = eptrb->epb_prev; /* Backup to previous group */
1574 }
1575 else saved_eptr = NULL;
1576
1577 /* If we are at the end of an assertion group or an atomic group, stop
1578 matching and return MATCH_MATCH, but record the current high water mark for
1579 use by positive assertions. We also need to record the match start in case
1580 it was changed by \K. */
1581
1582 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1583 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1584 *prev == OP_ONCE)
1585 {
1586 md->end_match_ptr = eptr; /* For ONCE */
1587 md->end_offset_top = offset_top;
1588 md->start_match_ptr = mstart;
1589 MRRETURN(MATCH_MATCH);
1590 }
1591
1592 /* For capturing groups we have to check the group number back at the start
1593 and if necessary complete handling an extraction by setting the offsets and
1594 bumping the high water mark. Note that whole-pattern recursion is coded as
1595 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1596 when the OP_END is reached. Other recursion is handled here. */
1597
1598 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1599 {
1600 number = GET2(prev, 1+LINK_SIZE);
1601 offset = number << 1;
1602
1603 #ifdef PCRE_DEBUG
1604 printf("end bracket %d", number);
1605 printf("\n");
1606 #endif
1607
1608 md->capture_last = number;
1609 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1610 {
1611 md->offset_vector[offset] =
1612 md->offset_vector[md->offset_end - number];
1613 md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1614 if (offset_top <= offset) offset_top = offset + 2;
1615 }
1616
1617 /* Handle a recursively called group. Restore the offsets
1618 appropriately and continue from after the call. */
1619
1620 if (md->recursive != NULL && md->recursive->group_num == number)
1621 {
1622 recursion_info *rec = md->recursive;
1623 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1624 md->recursive = rec->prevrec;
1625 memcpy(md->offset_vector, rec->offset_save,
1626 rec->saved_max * sizeof(int));
1627 offset_top = rec->save_offset_top;
1628 ecode = rec->after_call;
1629 ims = original_ims;
1630 break;
1631 }
1632 }
1633
1634 /* For both capturing and non-capturing groups, reset the value of the ims
1635 flags, in case they got changed during the group. */
1636
1637 ims = original_ims;
1638 DPRINTF(("ims reset to %02lx\n", ims));
1639
1640 /* For a non-repeating ket, just continue at this level. This also
1641 happens for a repeating ket if no characters were matched in the group.
1642 This is the forcible breaking of infinite loops as implemented in Perl
1643 5.005. If there is an options reset, it will get obeyed in the normal
1644 course of events. */
1645
1646 if (*ecode == OP_KET || eptr == saved_eptr)
1647 {
1648 ecode += 1 + LINK_SIZE;
1649 break;
1650 }
1651
1652 /* The repeating kets try the rest of the pattern or restart from the
1653 preceding bracket, in the appropriate order. In the second case, we can use
1654 tail recursion to avoid using another stack frame, unless we have an
1655 unlimited repeat of a group that can match an empty string. */
1656
1657 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1658
1659 if (*ecode == OP_KETRMIN)
1660 {
1661 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1662 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1663 if (flags != 0) /* Could match an empty string */
1664 {
1665 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1666 RRETURN(rrc);
1667 }
1668 ecode = prev;
1669 goto TAIL_RECURSE;
1670 }
1671 else /* OP_KETRMAX */
1672 {
1673 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1674 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1675 ecode += 1 + LINK_SIZE;
1676 flags = 0;
1677 goto TAIL_RECURSE;
1678 }
1679 /* Control never gets here */
1680
1681 /* Start of subject unless notbol, or after internal newline if multiline */
1682
1683 case OP_CIRC:
1684 if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1685 if ((ims & PCRE_MULTILINE) != 0)
1686 {
1687 if (eptr != md->start_subject &&
1688 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1689 MRRETURN(MATCH_NOMATCH);
1690 ecode++;
1691 break;
1692 }
1693 /* ... else fall through */
1694
1695 /* Start of subject assertion */
1696
1697 case OP_SOD:
1698 if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1699 ecode++;
1700 break;
1701
1702 /* Start of match assertion */
1703
1704 case OP_SOM:
1705 if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1706 ecode++;
1707 break;
1708
1709 /* Reset the start of match point */
1710
1711 case OP_SET_SOM:
1712 mstart = eptr;
1713 ecode++;
1714 break;
1715
1716 /* Assert before internal newline if multiline, or before a terminating
1717 newline unless endonly is set, else end of subject unless noteol is set. */
1718
1719 case OP_DOLL:
1720 if ((ims & PCRE_MULTILINE) != 0)
1721 {
1722 if (eptr < md->end_subject)
1723 { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1724 else
1725 {
1726 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1727 SCHECK_PARTIAL();
1728 }
1729 ecode++;
1730 break;
1731 }
1732 else /* Not multiline */
1733 {
1734 if (md->noteol) MRRETURN(MATCH_NOMATCH);
1735 if (!md->endonly) goto ASSERT_NL_OR_EOS;
1736 }
1737
1738 /* ... else fall through for endonly */
1739
1740 /* End of subject assertion (\z) */
1741
1742 case OP_EOD:
1743 if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1744 SCHECK_PARTIAL();
1745 ecode++;
1746 break;
1747
1748 /* End of subject or ending \n assertion (\Z) */
1749
1750 case OP_EODN:
1751 ASSERT_NL_OR_EOS:
1752 if (eptr < md->end_subject &&
1753 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1754 MRRETURN(MATCH_NOMATCH);
1755
1756 /* Either at end of string or \n before end. */
1757
1758 SCHECK_PARTIAL();
1759 ecode++;
1760 break;
1761
1762 /* Word boundary assertions */
1763
1764 case OP_NOT_WORD_BOUNDARY:
1765 case OP_WORD_BOUNDARY:
1766 {
1767
1768 /* Find out if the previous and current characters are "word" characters.
1769 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1770 be "non-word" characters. Remember the earliest consulted character for
1771 partial matching. */
1772
1773 #ifdef SUPPORT_UTF8
1774 if (utf8)
1775 {
1776 /* Get status of previous character */
1777
1778 if (eptr == md->start_subject) prev_is_word = FALSE; else
1779 {
1780 USPTR lastptr = eptr - 1;
1781 while((*lastptr & 0xc0) == 0x80) lastptr--;
1782 if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1783 GETCHAR(c, lastptr);
1784 #ifdef SUPPORT_UCP
1785 if (md->use_ucp)
1786 {
1787 if (c == '_') prev_is_word = TRUE; else
1788 {
1789 int cat = UCD_CATEGORY(c);
1790 prev_is_word = (cat == ucp_L || cat == ucp_N);
1791 }
1792 }
1793 else
1794 #endif
1795 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1796 }
1797
1798 /* Get status of next character */
1799
1800 if (eptr >= md->end_subject)
1801 {
1802 SCHECK_PARTIAL();
1803 cur_is_word = FALSE;
1804 }
1805 else
1806 {
1807 GETCHAR(c, eptr);
1808 #ifdef SUPPORT_UCP
1809 if (md->use_ucp)
1810 {
1811 if (c == '_') cur_is_word = TRUE; else
1812 {
1813 int cat = UCD_CATEGORY(c);
1814 cur_is_word = (cat == ucp_L || cat == ucp_N);
1815 }
1816 }
1817 else
1818 #endif
1819 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1820 }
1821 }
1822 else
1823 #endif
1824
1825 /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1826 consistency with the behaviour of \w we do use it in this case. */
1827
1828 {
1829 /* Get status of previous character */
1830
1831 if (eptr == md->start_subject) prev_is_word = FALSE; else
1832 {
1833 if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1834 #ifdef SUPPORT_UCP
1835 if (md->use_ucp)
1836 {
1837 c = eptr[-1];
1838 if (c == '_') prev_is_word = TRUE; else
1839 {
1840 int cat = UCD_CATEGORY(c);
1841 prev_is_word = (cat == ucp_L || cat == ucp_N);
1842 }
1843 }
1844 else
1845 #endif
1846 prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1847 }
1848
1849 /* Get status of next character */
1850
1851 if (eptr >= md->end_subject)
1852 {
1853 SCHECK_PARTIAL();
1854 cur_is_word = FALSE;
1855 }
1856 else
1857 #ifdef SUPPORT_UCP
1858 if (md->use_ucp)
1859 {
1860 c = *eptr;
1861 if (c == '_') cur_is_word = TRUE; else
1862 {
1863 int cat = UCD_CATEGORY(c);
1864 cur_is_word = (cat == ucp_L || cat == ucp_N);
1865 }
1866 }
1867 else
1868 #endif
1869 cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1870 }
1871
1872 /* Now see if the situation is what we want */
1873
1874 if ((*ecode++ == OP_WORD_BOUNDARY)?
1875 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1876 MRRETURN(MATCH_NOMATCH);
1877 }
1878 break;
1879
1880 /* Match a single character type; inline for speed */
1881
1882 case OP_ANY:
1883 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1884 /* Fall through */
1885
1886 case OP_ALLANY:
1887 if (eptr++ >= md->end_subject)
1888 {
1889 SCHECK_PARTIAL();
1890 MRRETURN(MATCH_NOMATCH);
1891 }
1892 if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1893 ecode++;
1894 break;
1895
1896 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1897 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1898
1899 case OP_ANYBYTE:
1900 if (eptr++ >= md->end_subject)
1901 {
1902 SCHECK_PARTIAL();
1903 MRRETURN(MATCH_NOMATCH);
1904 }
1905 ecode++;
1906 break;
1907
1908 case OP_NOT_DIGIT:
1909 if (eptr >= md->end_subject)
1910 {
1911 SCHECK_PARTIAL();
1912 MRRETURN(MATCH_NOMATCH);
1913 }
1914 GETCHARINCTEST(c, eptr);
1915 if (
1916 #ifdef SUPPORT_UTF8
1917 c < 256 &&
1918 #endif
1919 (md->ctypes[c] & ctype_digit) != 0
1920 )
1921 MRRETURN(MATCH_NOMATCH);
1922 ecode++;
1923 break;
1924
1925 case OP_DIGIT:
1926 if (eptr >= md->end_subject)
1927 {
1928 SCHECK_PARTIAL();
1929 MRRETURN(MATCH_NOMATCH);
1930 }
1931 GETCHARINCTEST(c, eptr);
1932 if (
1933 #ifdef SUPPORT_UTF8
1934 c >= 256 ||
1935 #endif
1936 (md->ctypes[c] & ctype_digit) == 0
1937 )
1938 MRRETURN(MATCH_NOMATCH);
1939 ecode++;
1940 break;
1941
1942 case OP_NOT_WHITESPACE:
1943 if (eptr >= md->end_subject)
1944 {
1945 SCHECK_PARTIAL();
1946 MRRETURN(MATCH_NOMATCH);
1947 }
1948 GETCHARINCTEST(c, eptr);
1949 if (
1950 #ifdef SUPPORT_UTF8
1951 c < 256 &&
1952 #endif
1953 (md->ctypes[c] & ctype_space) != 0
1954 )
1955 MRRETURN(MATCH_NOMATCH);
1956 ecode++;
1957 break;
1958
1959 case OP_WHITESPACE:
1960 if (eptr >= md->end_subject)
1961 {
1962 SCHECK_PARTIAL();
1963 MRRETURN(MATCH_NOMATCH);
1964 }
1965 GETCHARINCTEST(c, eptr);
1966 if (
1967 #ifdef SUPPORT_UTF8
1968 c >= 256 ||
1969 #endif
1970 (md->ctypes[c] & ctype_space) == 0
1971 )
1972 MRRETURN(MATCH_NOMATCH);
1973 ecode++;
1974 break;
1975
1976 case OP_NOT_WORDCHAR:
1977 if (eptr >= md->end_subject)
1978 {
1979 SCHECK_PARTIAL();
1980 MRRETURN(MATCH_NOMATCH);
1981 }
1982 GETCHARINCTEST(c, eptr);
1983 if (
1984 #ifdef SUPPORT_UTF8
1985 c < 256 &&
1986 #endif
1987 (md->ctypes[c] & ctype_word) != 0
1988 )
1989 MRRETURN(MATCH_NOMATCH);
1990 ecode++;
1991 break;
1992
1993 case OP_WORDCHAR:
1994 if (eptr >= md->end_subject)
1995 {
1996 SCHECK_PARTIAL();
1997 MRRETURN(MATCH_NOMATCH);
1998 }
1999 GETCHARINCTEST(c, eptr);
2000 if (
2001 #ifdef SUPPORT_UTF8
2002 c >= 256 ||
2003 #endif
2004 (md->ctypes[c] & ctype_word) == 0
2005 )
2006 MRRETURN(MATCH_NOMATCH);
2007 ecode++;
2008 break;
2009
2010 case OP_ANYNL:
2011 if (eptr >= md->end_subject)
2012 {
2013 SCHECK_PARTIAL();
2014 MRRETURN(MATCH_NOMATCH);
2015 }
2016 GETCHARINCTEST(c, eptr);
2017 switch(c)
2018 {
2019 default: MRRETURN(MATCH_NOMATCH);
2020 case 0x000d:
2021 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2022 break;
2023
2024 case 0x000a:
2025 break;
2026
2027 case 0x000b:
2028 case 0x000c:
2029 case 0x0085:
2030 case 0x2028:
2031 case 0x2029:
2032 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2033 break;
2034 }
2035 ecode++;
2036 break;
2037
2038 case OP_NOT_HSPACE:
2039 if (eptr >= md->end_subject)
2040 {
2041 SCHECK_PARTIAL();
2042 MRRETURN(MATCH_NOMATCH);
2043 }
2044 GETCHARINCTEST(c, eptr);
2045 switch(c)
2046 {
2047 default: break;
2048 case 0x09: /* HT */
2049 case 0x20: /* SPACE */
2050 case 0xa0: /* NBSP */
2051 case 0x1680: /* OGHAM SPACE MARK */
2052 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2053 case 0x2000: /* EN QUAD */
2054 case 0x2001: /* EM QUAD */
2055 case 0x2002: /* EN SPACE */
2056 case 0x2003: /* EM SPACE */
2057 case 0x2004: /* THREE-PER-EM SPACE */
2058 case 0x2005: /* FOUR-PER-EM SPACE */
2059 case 0x2006: /* SIX-PER-EM SPACE */
2060 case 0x2007: /* FIGURE SPACE */
2061 case 0x2008: /* PUNCTUATION SPACE */
2062 case 0x2009: /* THIN SPACE */
2063 case 0x200A: /* HAIR SPACE */
2064 case 0x202f: /* NARROW NO-BREAK SPACE */
2065 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2066 case 0x3000: /* IDEOGRAPHIC SPACE */
2067 MRRETURN(MATCH_NOMATCH);
2068 }
2069 ecode++;
2070 break;
2071
2072 case OP_HSPACE:
2073 if (eptr >= md->end_subject)
2074 {
2075 SCHECK_PARTIAL();
2076 MRRETURN(MATCH_NOMATCH);
2077 }
2078 GETCHARINCTEST(c, eptr);
2079 switch(c)
2080 {
2081 default: MRRETURN(MATCH_NOMATCH);
2082 case 0x09: /* HT */
2083 case 0x20: /* SPACE */
2084 case 0xa0: /* NBSP */
2085 case 0x1680: /* OGHAM SPACE MARK */
2086 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2087 case 0x2000: /* EN QUAD */
2088 case 0x2001: /* EM QUAD */
2089 case 0x2002: /* EN SPACE */
2090 case 0x2003: /* EM SPACE */
2091 case 0x2004: /* THREE-PER-EM SPACE */
2092 case 0x2005: /* FOUR-PER-EM SPACE */
2093 case 0x2006: /* SIX-PER-EM SPACE */
2094 case 0x2007: /* FIGURE SPACE */
2095 case 0x2008: /* PUNCTUATION SPACE */
2096 case 0x2009: /* THIN SPACE */
2097 case 0x200A: /* HAIR SPACE */
2098 case 0x202f: /* NARROW NO-BREAK SPACE */
2099 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2100 case 0x3000: /* IDEOGRAPHIC SPACE */
2101 break;
2102 }
2103 ecode++;
2104 break;
2105
2106 case OP_NOT_VSPACE:
2107 if (eptr >= md->end_subject)
2108 {
2109 SCHECK_PARTIAL();
2110 MRRETURN(MATCH_NOMATCH);
2111 }
2112 GETCHARINCTEST(c, eptr);
2113 switch(c)
2114 {
2115 default: break;
2116 case 0x0a: /* LF */
2117 case 0x0b: /* VT */
2118 case 0x0c: /* FF */
2119 case 0x0d: /* CR */
2120 case 0x85: /* NEL */
2121 case 0x2028: /* LINE SEPARATOR */
2122 case 0x2029: /* PARAGRAPH SEPARATOR */
2123 MRRETURN(MATCH_NOMATCH);
2124 }
2125 ecode++;
2126 break;
2127
2128 case OP_VSPACE:
2129 if (eptr >= md->end_subject)
2130 {
2131 SCHECK_PARTIAL();
2132 MRRETURN(MATCH_NOMATCH);
2133 }
2134 GETCHARINCTEST(c, eptr);
2135 switch(c)
2136 {
2137 default: MRRETURN(MATCH_NOMATCH);
2138 case 0x0a: /* LF */
2139 case 0x0b: /* VT */
2140 case 0x0c: /* FF */
2141 case 0x0d: /* CR */
2142 case 0x85: /* NEL */
2143 case 0x2028: /* LINE SEPARATOR */
2144 case 0x2029: /* PARAGRAPH SEPARATOR */
2145 break;
2146 }
2147 ecode++;
2148 break;
2149
2150 #ifdef SUPPORT_UCP
2151 /* Check the next character by Unicode property. We will get here only
2152 if the support is in the binary; otherwise a compile-time error occurs. */
2153
2154 case OP_PROP:
2155 case OP_NOTPROP:
2156 if (eptr >= md->end_subject)
2157 {
2158 SCHECK_PARTIAL();
2159 MRRETURN(MATCH_NOMATCH);
2160 }
2161 GETCHARINCTEST(c, eptr);
2162 {
2163 const ucd_record *prop = GET_UCD(c);
2164
2165 switch(ecode[1])
2166 {
2167 case PT_ANY:
2168 if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2169 break;
2170
2171 case PT_LAMP:
2172 if ((prop->chartype == ucp_Lu ||
2173 prop->chartype == ucp_Ll ||
2174 prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2175 MRRETURN(MATCH_NOMATCH);
2176 break;
2177
2178 case PT_GC:
2179 if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2180 MRRETURN(MATCH_NOMATCH);
2181 break;
2182
2183 case PT_PC:
2184 if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2185 MRRETURN(MATCH_NOMATCH);
2186 break;
2187
2188 case PT_SC:
2189 if ((ecode[2] != prop->script) == (op == OP_PROP))
2190 MRRETURN(MATCH_NOMATCH);
2191 break;
2192
2193 /* These are specials */
2194
2195 case PT_ALNUM:
2196 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2198 MRRETURN(MATCH_NOMATCH);
2199 break;
2200
2201 case PT_SPACE: /* Perl space */
2202 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2203 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2204 == (op == OP_NOTPROP))
2205 MRRETURN(MATCH_NOMATCH);
2206 break;
2207
2208 case PT_PXSPACE: /* POSIX space */
2209 if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2210 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2211 c == CHAR_FF || c == CHAR_CR)
2212 == (op == OP_NOTPROP))
2213 MRRETURN(MATCH_NOMATCH);
2214 break;
2215
2216 case PT_WORD:
2217 if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2218 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2219 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2220 MRRETURN(MATCH_NOMATCH);
2221 break;
2222
2223 /* This should never occur */
2224
2225 default:
2226 RRETURN(PCRE_ERROR_INTERNAL);
2227 }
2228
2229 ecode += 3;
2230 }
2231 break;
2232
2233 /* Match an extended Unicode sequence. We will get here only if the support
2234 is in the binary; otherwise a compile-time error occurs. */
2235
2236 case OP_EXTUNI:
2237 if (eptr >= md->end_subject)
2238 {
2239 SCHECK_PARTIAL();
2240 MRRETURN(MATCH_NOMATCH);
2241 }
2242 GETCHARINCTEST(c, eptr);
2243 {
2244 int category = UCD_CATEGORY(c);
2245 if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
2246 while (eptr < md->end_subject)
2247 {
2248 int len = 1;
2249 if (!utf8) c = *eptr; else
2250 {
2251 GETCHARLEN(c, eptr, len);
2252 }
2253 category = UCD_CATEGORY(c);
2254 if (category != ucp_M) break;
2255 eptr += len;
2256 }
2257 }
2258 ecode++;
2259 break;
2260 #endif
2261
2262
2263 /* Match a back reference, possibly repeatedly. Look past the end of the
2264 item to see if there is repeat information following. The code is similar
2265 to that for character classes, but repeated for efficiency. Then obey
2266 similar code to character type repeats - written out again for speed.
2267 However, if the referenced string is the empty string, always treat
2268 it as matched, any number of times (otherwise there could be infinite
2269 loops). */
2270
2271 case OP_REF:
2272 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2273 ecode += 3;
2274
2275 /* If the reference is unset, there are two possibilities:
2276
2277 (a) In the default, Perl-compatible state, set the length negative;
2278 this ensures that every attempt at a match fails. We can't just fail
2279 here, because of the possibility of quantifiers with zero minima.
2280
2281 (b) If the JavaScript compatibility flag is set, set the length to zero
2282 so that the back reference matches an empty string.
2283
2284 Otherwise, set the length to the length of what was matched by the
2285 referenced subpattern. */
2286
2287 if (offset >= offset_top || md->offset_vector[offset] < 0)
2288 length = (md->jscript_compat)? 0 : -1;
2289 else
2290 length = md->offset_vector[offset+1] - md->offset_vector[offset];
2291
2292 /* Set up for repetition, or handle the non-repeated case */
2293
2294 switch (*ecode)
2295 {
2296 case OP_CRSTAR:
2297 case OP_CRMINSTAR:
2298 case OP_CRPLUS:
2299 case OP_CRMINPLUS:
2300 case OP_CRQUERY:
2301 case OP_CRMINQUERY:
2302 c = *ecode++ - OP_CRSTAR;
2303 minimize = (c & 1) != 0;
2304 min = rep_min[c]; /* Pick up values from tables; */
2305 max = rep_max[c]; /* zero for max => infinity */
2306 if (max == 0) max = INT_MAX;
2307 break;
2308
2309 case OP_CRRANGE:
2310 case OP_CRMINRANGE:
2311 minimize = (*ecode == OP_CRMINRANGE);
2312 min = GET2(ecode, 1);
2313 max = GET2(ecode, 3);
2314 if (max == 0) max = INT_MAX;
2315 ecode += 5;
2316 break;
2317
2318 default: /* No repeat follows */
2319 if ((length = match_ref(offset, eptr, length, md, ims)) < 0)
2320 {
2321 CHECK_PARTIAL();
2322 MRRETURN(MATCH_NOMATCH);
2323 }
2324 eptr += length;
2325 continue; /* With the main loop */
2326 }
2327
2328 /* Handle repeated back references. If the length of the reference is
2329 zero, just continue with the main loop. */
2330
2331 if (length == 0) continue;
2332
2333 /* First, ensure the minimum number of matches are present. We get back
2334 the length of the reference string explicitly rather than passing the
2335 address of eptr, so that eptr can be a register variable. */
2336
2337 for (i = 1; i <= min; i++)
2338 {
2339 int slength;
2340 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2341 {
2342 CHECK_PARTIAL();
2343 MRRETURN(MATCH_NOMATCH);
2344 }
2345 eptr += slength;
2346 }
2347
2348 /* If min = max, continue at the same level without recursion.
2349 They are not both allowed to be zero. */
2350
2351 if (min == max) continue;
2352
2353 /* If minimizing, keep trying and advancing the pointer */
2354
2355 if (minimize)
2356 {
2357 for (fi = min;; fi++)
2358 {
2359 int slength;
2360 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2361 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2362 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2363 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2364 {
2365 CHECK_PARTIAL();
2366 MRRETURN(MATCH_NOMATCH);
2367 }
2368 eptr += slength;
2369 }
2370 /* Control never gets here */
2371 }
2372
2373 /* If maximizing, find the longest string and work backwards */
2374
2375 else
2376 {
2377 pp = eptr;
2378 for (i = min; i < max; i++)
2379 {
2380 int slength;
2381 if ((slength = match_ref(offset, eptr, length, md, ims)) < 0)
2382 {
2383 CHECK_PARTIAL();
2384 break;
2385 }
2386 eptr += slength;
2387 }
2388 while (eptr >= pp)
2389 {
2390 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2391 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2392 eptr -= length;
2393 }
2394 MRRETURN(MATCH_NOMATCH);
2395 }
2396 /* Control never gets here */
2397
2398 /* Match a bit-mapped character class, possibly repeatedly. This op code is
2399 used when all the characters in the class have values in the range 0-255,
2400 and either the matching is caseful, or the characters are in the range
2401 0-127 when UTF-8 processing is enabled. The only difference between
2402 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2403 encountered.
2404
2405 First, look past the end of the item to see if there is repeat information
2406 following. Then obey similar code to character type repeats - written out
2407 again for speed. */
2408
2409 case OP_NCLASS:
2410 case OP_CLASS:
2411 {
2412 data = ecode + 1; /* Save for matching */
2413 ecode += 33; /* Advance past the item */
2414
2415 switch (*ecode)
2416 {
2417 case OP_CRSTAR:
2418 case OP_CRMINSTAR:
2419 case OP_CRPLUS:
2420 case OP_CRMINPLUS:
2421 case OP_CRQUERY:
2422 case OP_CRMINQUERY:
2423 c = *ecode++ - OP_CRSTAR;
2424 minimize = (c & 1) != 0;
2425 min = rep_min[c]; /* Pick up values from tables; */
2426 max = rep_max[c]; /* zero for max => infinity */
2427 if (max == 0) max = INT_MAX;
2428 break;
2429
2430 case OP_CRRANGE:
2431 case OP_CRMINRANGE:
2432 minimize = (*ecode == OP_CRMINRANGE);
2433 min = GET2(ecode, 1);
2434 max = GET2(ecode, 3);
2435 if (max == 0) max = INT_MAX;
2436 ecode += 5;
2437 break;
2438
2439 default: /* No repeat follows */
2440 min = max = 1;
2441 break;
2442 }
2443
2444 /* First, ensure the minimum number of matches are present. */
2445
2446 #ifdef SUPPORT_UTF8
2447 /* UTF-8 mode */
2448 if (utf8)
2449 {
2450 for (i = 1; i <= min; i++)
2451 {
2452 if (eptr >= md->end_subject)
2453 {
2454 SCHECK_PARTIAL();
2455 MRRETURN(MATCH_NOMATCH);
2456 }
2457 GETCHARINC(c, eptr);
2458 if (c > 255)
2459 {
2460 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2461 }
2462 else
2463 {
2464 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2465 }
2466 }
2467 }
2468 else
2469 #endif
2470 /* Not UTF-8 mode */
2471 {
2472 for (i = 1; i <= min; i++)
2473 {
2474 if (eptr >= md->end_subject)
2475 {
2476 SCHECK_PARTIAL();
2477 MRRETURN(MATCH_NOMATCH);
2478 }
2479 c = *eptr++;
2480 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2481 }
2482 }
2483
2484 /* If max == min we can continue with the main loop without the
2485 need to recurse. */
2486
2487 if (min == max) continue;
2488
2489 /* If minimizing, keep testing the rest of the expression and advancing
2490 the pointer while it matches the class. */
2491
2492 if (minimize)
2493 {
2494 #ifdef SUPPORT_UTF8
2495 /* UTF-8 mode */
2496 if (utf8)
2497 {
2498 for (fi = min;; fi++)
2499 {
2500 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2501 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2502 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2503 if (eptr >= md->end_subject)
2504 {
2505 SCHECK_PARTIAL();
2506 MRRETURN(MATCH_NOMATCH);
2507 }
2508 GETCHARINC(c, eptr);
2509 if (c > 255)
2510 {
2511 if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2512 }
2513 else
2514 {
2515 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2516 }
2517 }
2518 }
2519 else
2520 #endif
2521 /* Not UTF-8 mode */
2522 {
2523 for (fi = min;; fi++)
2524 {
2525 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2526 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2528 if (eptr >= md->end_subject)
2529 {
2530 SCHECK_PARTIAL();
2531 MRRETURN(MATCH_NOMATCH);
2532 }
2533 c = *eptr++;
2534 if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2535 }
2536 }
2537 /* Control never gets here */
2538 }
2539
2540 /* If maximizing, find the longest possible run, then work backwards. */
2541
2542 else
2543 {
2544 pp = eptr;
2545
2546 #ifdef SUPPORT_UTF8
2547 /* UTF-8 mode */
2548 if (utf8)
2549 {
2550 for (i = min; i < max; i++)
2551 {
2552 int len = 1;
2553 if (eptr >= md->end_subject)
2554 {
2555 SCHECK_PARTIAL();
2556 break;
2557 }
2558 GETCHARLEN(c, eptr, len);
2559 if (c > 255)
2560 {
2561 if (op == OP_CLASS) break;
2562 }
2563 else
2564 {
2565 if ((data[c/8] & (1 << (c&7))) == 0) break;
2566 }
2567 eptr += len;
2568 }
2569 for (;;)
2570 {
2571 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2572 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2573 if (eptr-- == pp) break; /* Stop if tried at original pos */
2574 BACKCHAR(eptr);
2575 }
2576 }
2577 else
2578 #endif
2579 /* Not UTF-8 mode */
2580 {
2581 for (i = min; i < max; i++)
2582 {
2583 if (eptr >= md->end_subject)
2584 {
2585 SCHECK_PARTIAL();
2586 break;
2587 }
2588 c = *eptr;
2589 if ((data[c/8] & (1 << (c&7))) == 0) break;
2590 eptr++;
2591 }
2592 while (eptr >= pp)
2593 {
2594 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2595 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2596 eptr--;
2597 }
2598 }
2599
2600 MRRETURN(MATCH_NOMATCH);
2601 }
2602 }
2603 /* Control never gets here */
2604
2605
2606 /* Match an extended character class. This opcode is encountered only
2607 when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2608 mode, because Unicode properties are supported in non-UTF-8 mode. */
2609
2610 #ifdef SUPPORT_UTF8
2611 case OP_XCLASS:
2612 {
2613 data = ecode + 1 + LINK_SIZE; /* Save for matching */
2614 ecode += GET(ecode, 1); /* Advance past the item */
2615
2616 switch (*ecode)
2617 {
2618 case OP_CRSTAR:
2619 case OP_CRMINSTAR:
2620 case OP_CRPLUS:
2621 case OP_CRMINPLUS:
2622 case OP_CRQUERY:
2623 case OP_CRMINQUERY:
2624 c = *ecode++ - OP_CRSTAR;
2625 minimize = (c & 1) != 0;
2626 min = rep_min[c]; /* Pick up values from tables; */
2627 max = rep_max[c]; /* zero for max => infinity */
2628 if (max == 0) max = INT_MAX;
2629 break;
2630
2631 case OP_CRRANGE:
2632 case OP_CRMINRANGE:
2633 minimize = (*ecode == OP_CRMINRANGE);
2634 min = GET2(ecode, 1);
2635 max = GET2(ecode, 3);
2636 if (max == 0) max = INT_MAX;
2637 ecode += 5;
2638 break;
2639
2640 default: /* No repeat follows */
2641 min = max = 1;
2642 break;
2643 }
2644
2645 /* First, ensure the minimum number of matches are present. */
2646
2647 for (i = 1; i <= min; i++)
2648 {
2649 if (eptr >= md->end_subject)
2650 {
2651 SCHECK_PARTIAL();
2652 MRRETURN(MATCH_NOMATCH);
2653 }
2654 GETCHARINCTEST(c, eptr);
2655 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2656 }
2657
2658 /* If max == min we can continue with the main loop without the
2659 need to recurse. */
2660
2661 if (min == max) continue;
2662
2663 /* If minimizing, keep testing the rest of the expression and advancing
2664 the pointer while it matches the class. */
2665
2666 if (minimize)
2667 {
2668 for (fi = min;; fi++)
2669 {
2670 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2671 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2673 if (eptr >= md->end_subject)
2674 {
2675 SCHECK_PARTIAL();
2676 MRRETURN(MATCH_NOMATCH);
2677 }
2678 GETCHARINCTEST(c, eptr);
2679 if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2680 }
2681 /* Control never gets here */
2682 }
2683
2684 /* If maximizing, find the longest possible run, then work backwards. */
2685
2686 else
2687 {
2688 pp = eptr;
2689 for (i = min; i < max; i++)
2690 {
2691 int len = 1;
2692 if (eptr >= md->end_subject)
2693 {
2694 SCHECK_PARTIAL();
2695 break;
2696 }
2697 GETCHARLENTEST(c, eptr, len);
2698 if (!_pcre_xclass(c, data)) break;
2699 eptr += len;
2700 }
2701 for(;;)
2702 {
2703 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2704 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2705 if (eptr-- == pp) break; /* Stop if tried at original pos */
2706 if (utf8) BACKCHAR(eptr);
2707 }
2708 MRRETURN(MATCH_NOMATCH);
2709 }
2710
2711 /* Control never gets here */
2712 }
2713 #endif /* End of XCLASS */
2714
2715 /* Match a single character, casefully */
2716
2717 case OP_CHAR:
2718 #ifdef SUPPORT_UTF8
2719 if (utf8)
2720 {
2721 length = 1;
2722 ecode++;
2723 GETCHARLEN(fc, ecode, length);
2724 if (length > md->end_subject - eptr)
2725 {
2726 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2727 MRRETURN(MATCH_NOMATCH);
2728 }
2729 while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2730 }
2731 else
2732 #endif
2733
2734 /* Non-UTF-8 mode */
2735 {
2736 if (md->end_subject - eptr < 1)
2737 {
2738 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2739 MRRETURN(MATCH_NOMATCH);
2740 }
2741 if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2742 ecode += 2;
2743 }
2744 break;
2745
2746 /* Match a single character, caselessly */
2747
2748 case OP_CHARNC:
2749 #ifdef SUPPORT_UTF8
2750 if (utf8)
2751 {
2752 length = 1;
2753 ecode++;
2754 GETCHARLEN(fc, ecode, length);
2755
2756 if (length > md->end_subject - eptr)
2757 {
2758 CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
2759 MRRETURN(MATCH_NOMATCH);
2760 }
2761
2762 /* If the pattern character's value is < 128, we have only one byte, and
2763 can use the fast lookup table. */
2764
2765 if (fc < 128)
2766 {
2767 if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2768 }
2769
2770 /* Otherwise we must pick up the subject character */
2771
2772 else
2773 {
2774 unsigned int dc;
2775 GETCHARINC(dc, eptr);
2776 ecode += length;
2777
2778 /* If we have Unicode property support, we can use it to test the other
2779 case of the character, if there is one. */
2780
2781 if (fc != dc)
2782 {
2783 #ifdef SUPPORT_UCP
2784 if (dc != UCD_OTHERCASE(fc))
2785 #endif
2786 MRRETURN(MATCH_NOMATCH);
2787 }
2788 }
2789 }
2790 else
2791 #endif /* SUPPORT_UTF8 */
2792
2793 /* Non-UTF-8 mode */
2794 {
2795 if (md->end_subject - eptr < 1)
2796 {
2797 SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
2798 MRRETURN(MATCH_NOMATCH);
2799 }
2800 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2801 ecode += 2;
2802 }
2803 break;
2804
2805 /* Match a single character repeatedly. */
2806
2807 case OP_EXACT:
2808 min = max = GET2(ecode, 1);
2809 ecode += 3;
2810 goto REPEATCHAR;
2811
2812 case OP_POSUPTO:
2813 possessive = TRUE;
2814 /* Fall through */
2815
2816 case OP_UPTO:
2817 case OP_MINUPTO:
2818 min = 0;
2819 max = GET2(ecode, 1);
2820 minimize = *ecode == OP_MINUPTO;
2821 ecode += 3;
2822 goto REPEATCHAR;
2823
2824 case OP_POSSTAR:
2825 possessive = TRUE;
2826 min = 0;
2827 max = INT_MAX;
2828 ecode++;
2829 goto REPEATCHAR;
2830
2831 case OP_POSPLUS:
2832 possessive = TRUE;
2833 min = 1;
2834 max = INT_MAX;
2835 ecode++;
2836 goto REPEATCHAR;
2837
2838 case OP_POSQUERY:
2839 possessive = TRUE;
2840 min = 0;
2841 max = 1;
2842 ecode++;
2843 goto REPEATCHAR;
2844
2845 case OP_STAR:
2846 case OP_MINSTAR:
2847 case OP_PLUS:
2848 case OP_MINPLUS:
2849 case OP_QUERY:
2850 case OP_MINQUERY:
2851 c = *ecode++ - OP_STAR;
2852 minimize = (c & 1) != 0;
2853
2854 min = rep_min[c]; /* Pick up values from tables; */
2855 max = rep_max[c]; /* zero for max => infinity */
2856 if (max == 0) max = INT_MAX;
2857
2858 /* Common code for all repeated single-character matches. */
2859
2860 REPEATCHAR:
2861 #ifdef SUPPORT_UTF8
2862 if (utf8)
2863 {
2864 length = 1;
2865 charptr = ecode;
2866 GETCHARLEN(fc, ecode, length);
2867 ecode += length;
2868
2869 /* Handle multibyte character matching specially here. There is
2870 support for caseless matching if UCP support is present. */
2871
2872 if (length > 1)
2873 {
2874 #ifdef SUPPORT_UCP
2875 unsigned int othercase;
2876 if ((ims & PCRE_CASELESS) != 0 &&
2877 (othercase = UCD_OTHERCASE(fc)) != fc)
2878 oclength = _pcre_ord2utf8(othercase, occhars);
2879 else oclength = 0;
2880 #endif /* SUPPORT_UCP */
2881
2882 for (i = 1; i <= min; i++)
2883 {
2884 if (eptr <= md->end_subject - length &&
2885 memcmp(eptr, charptr, length) == 0) eptr += length;
2886 #ifdef SUPPORT_UCP
2887 else if (oclength > 0 &&
2888 eptr <= md->end_subject - oclength &&
2889 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2890 #endif /* SUPPORT_UCP */
2891 else
2892 {
2893 CHECK_PARTIAL();
2894 MRRETURN(MATCH_NOMATCH);
2895 }
2896 }
2897
2898 if (min == max) continue;
2899
2900 if (minimize)
2901 {
2902 for (fi = min;; fi++)
2903 {
2904 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2905 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2906 if (fi >= max) MRRETURN(MATCH_NOMATCH);
2907 if (eptr <= md->end_subject - length &&
2908 memcmp(eptr, charptr, length) == 0) eptr += length;
2909 #ifdef SUPPORT_UCP
2910 else if (oclength > 0 &&
2911 eptr <= md->end_subject - oclength &&
2912 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913 #endif /* SUPPORT_UCP */
2914 else
2915 {
2916 CHECK_PARTIAL();
2917 MRRETURN(MATCH_NOMATCH);
2918 }
2919 }
2920 /* Control never gets here */
2921 }
2922
2923 else /* Maximize */
2924 {
2925 pp = eptr;
2926 for (i = min; i < max; i++)
2927 {
2928 if (eptr <= md->end_subject - length &&
2929 memcmp(eptr, charptr, length) == 0) eptr += length;
2930 #ifdef SUPPORT_UCP
2931 else if (oclength > 0 &&
2932 eptr <= md->end_subject - oclength &&
2933 memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2934 #endif /* SUPPORT_UCP */
2935 else
2936 {
2937 CHECK_PARTIAL();
2938 break;
2939 }
2940 }
2941
2942 if (possessive) continue;
2943
2944 for(;;)
2945 {
2946 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2947 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2948 if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2949 #ifdef SUPPORT_UCP
2950 eptr--;
2951 BACKCHAR(eptr);
2952 #else /* without SUPPORT_UCP */
2953 eptr -= length;
2954 #endif /* SUPPORT_UCP */
2955 }
2956 }
2957 /* Control never gets here */
2958 }
2959
2960 /* If the length of a UTF-8 character is 1, we fall through here, and
2961 obey the code as for non-UTF-8 characters below, though in this case the
2962 value of fc will always be < 128. */
2963 }
2964 else
2965 #endif /* SUPPORT_UTF8 */
2966
2967 /* When not in UTF-8 mode, load a single-byte character. */
2968
2969 fc = *ecode++;
2970
2971 /* The value of fc at this point is always less than 256, though we may or
2972 may not be in UTF-8 mode. The code is duplicated for the caseless and
2973 caseful cases, for speed, since matching characters is likely to be quite
2974 common. First, ensure the minimum number of matches are present. If min =
2975 max, continue at the same level without recursing. Otherwise, if
2976 minimizing, keep trying the rest of the expression and advancing one
2977 matching character if failing, up to the maximum. Alternatively, if
2978 maximizing, find the maximum number of characters and work backwards. */
2979
2980 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2981 max, eptr));
2982
2983 if ((ims & PCRE_CASELESS) != 0)
2984 {
2985 fc = md->lcc[fc];
2986 for (i = 1; i <= min; i++)
2987 {
2988 if (eptr >= md->end_subject)
2989 {
2990 SCHECK_PARTIAL();
2991 MRRETURN(MATCH_NOMATCH);
2992 }
2993 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2994 }
2995 if (min == max) continue;
2996 if (minimize)
2997 {
2998 for (fi = min;; fi++)
2999 {
3000 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
3001 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3002 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3003 if (eptr >= md->end_subject)
3004 {
3005 SCHECK_PARTIAL();
3006 MRRETURN(MATCH_NOMATCH);
3007 }
3008 if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3009 }
3010 /* Control never gets here */
3011 }
3012 else /* Maximize */
3013 {
3014 pp = eptr;
3015 for (i = min; i < max; i++)
3016 {
3017 if (eptr >= md->end_subject)
3018 {
3019 SCHECK_PARTIAL();
3020 break;
3021 }
3022 if (fc != md->lcc[*eptr]) break;
3023 eptr++;
3024 }
3025
3026 if (possessive) continue;
3027
3028 while (eptr >= pp)
3029 {
3030 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3031 eptr--;
3032 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3033 }
3034 MRRETURN(MATCH_NOMATCH);
3035 }
3036 /* Control never gets here */
3037 }
3038
3039 /* Caseful comparisons (includes all multi-byte characters) */
3040
3041 else
3042 {
3043 for (i = 1; i <= min; i++)
3044 {
3045 if (eptr >= md->end_subject)
3046 {
3047 SCHECK_PARTIAL();
3048 MRRETURN(MATCH_NOMATCH);
3049 }
3050 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051 }
3052
3053 if (min == max) continue;
3054
3055 if (minimize)
3056 {
3057 for (fi = min;; fi++)
3058 {
3059 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3060 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3061 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3062 if (eptr >= md->end_subject)
3063 {
3064 SCHECK_PARTIAL();
3065 MRRETURN(MATCH_NOMATCH);
3066 }
3067 if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3068 }
3069 /* Control never gets here */
3070 }
3071 else /* Maximize */
3072 {
3073 pp = eptr;
3074 for (i = min; i < max; i++)
3075 {
3076 if (eptr >= md->end_subject)
3077 {
3078 SCHECK_PARTIAL();
3079 break;
3080 }
3081 if (fc != *eptr) break;
3082 eptr++;
3083 }
3084 if (possessive) continue;
3085
3086 while (eptr >= pp)
3087 {
3088 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3089 eptr--;
3090 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3091 }
3092 MRRETURN(MATCH_NOMATCH);
3093 }
3094 }
3095 /* Control never gets here */
3096
3097 /* Match a negated single one-byte character. The character we are
3098 checking can be multibyte. */
3099
3100 case OP_NOT:
3101 if (eptr >= md->end_subject)
3102 {
3103 SCHECK_PARTIAL();
3104 MRRETURN(MATCH_NOMATCH);
3105 }
3106 ecode++;
3107 GETCHARINCTEST(c, eptr);
3108 if ((ims & PCRE_CASELESS) != 0)
3109 {
3110 #ifdef SUPPORT_UTF8
3111 if (c < 256)
3112 #endif
3113 c = md->lcc[c];
3114 if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3115 }
3116 else
3117 {
3118 if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3119 }
3120 break;
3121
3122 /* Match a negated single one-byte character repeatedly. This is almost a
3123 repeat of the code for a repeated single character, but I haven't found a
3124 nice way of commoning these up that doesn't require a test of the
3125 positive/negative option for each character match. Maybe that wouldn't add
3126 very much to the time taken, but character matching *is* what this is all
3127 about... */
3128
3129 case OP_NOTEXACT:
3130 min = max = GET2(ecode, 1);
3131 ecode += 3;
3132 goto REPEATNOTCHAR;
3133
3134 case OP_NOTUPTO:
3135 case OP_NOTMINUPTO:
3136 min = 0;
3137 max = GET2(ecode, 1);
3138 minimize = *ecode == OP_NOTMINUPTO;
3139 ecode += 3;
3140 goto REPEATNOTCHAR;
3141
3142 case OP_NOTPOSSTAR:
3143 possessive = TRUE;
3144 min = 0;
3145 max = INT_MAX;
3146 ecode++;
3147 goto REPEATNOTCHAR;
3148
3149 case OP_NOTPOSPLUS:
3150 possessive = TRUE;
3151 min = 1;
3152 max = INT_MAX;
3153 ecode++;
3154 goto REPEATNOTCHAR;
3155
3156 case OP_NOTPOSQUERY:
3157 possessive = TRUE;
3158 min = 0;
3159 max = 1;
3160 ecode++;
3161 goto REPEATNOTCHAR;
3162
3163 case OP_NOTPOSUPTO:
3164 possessive = TRUE;
3165 min = 0;
3166 max = GET2(ecode, 1);
3167 ecode += 3;
3168 goto REPEATNOTCHAR;
3169
3170 case OP_NOTSTAR:
3171 case OP_NOTMINSTAR:
3172 case OP_NOTPLUS:
3173 case OP_NOTMINPLUS:
3174 case OP_NOTQUERY:
3175 case OP_NOTMINQUERY:
3176 c = *ecode++ - OP_NOTSTAR;
3177 minimize = (c & 1) != 0;
3178 min = rep_min[c]; /* Pick up values from tables; */
3179 max = rep_max[c]; /* zero for max => infinity */
3180 if (max == 0) max = INT_MAX;
3181
3182 /* Common code for all repeated single-byte matches. */
3183
3184 REPEATNOTCHAR:
3185 fc = *ecode++;
3186
3187 /* The code is duplicated for the caseless and caseful cases, for speed,
3188 since matching characters is likely to be quite common. First, ensure the
3189 minimum number of matches are present. If min = max, continue at the same
3190 level without recursing. Otherwise, if minimizing, keep trying the rest of
3191 the expression and advancing one matching character if failing, up to the
3192 maximum. Alternatively, if maximizing, find the maximum number of
3193 characters and work backwards. */
3194
3195 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3196 max, eptr));
3197
3198 if ((ims & PCRE_CASELESS) != 0)
3199 {
3200 fc = md->lcc[fc];
3201
3202 #ifdef SUPPORT_UTF8
3203 /* UTF-8 mode */
3204 if (utf8)
3205 {
3206 register unsigned int d;
3207 for (i = 1; i <= min; i++)
3208 {
3209 if (eptr >= md->end_subject)
3210 {
3211 SCHECK_PARTIAL();
3212 MRRETURN(MATCH_NOMATCH);
3213 }
3214 GETCHARINC(d, eptr);
3215 if (d < 256) d = md->lcc[d];
3216 if (fc == d) MRRETURN(MATCH_NOMATCH);
3217 }
3218 }
3219 else
3220 #endif
3221
3222 /* Not UTF-8 mode */
3223 {
3224 for (i = 1; i <= min; i++)
3225 {
3226 if (eptr >= md->end_subject)
3227 {
3228 SCHECK_PARTIAL();
3229 MRRETURN(MATCH_NOMATCH);
3230 }
3231 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3232 }
3233 }
3234
3235 if (min == max) continue;
3236
3237 if (minimize)
3238 {
3239 #ifdef SUPPORT_UTF8
3240 /* UTF-8 mode */
3241 if (utf8)
3242 {
3243 register unsigned int d;
3244 for (fi = min;; fi++)
3245 {
3246 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3247 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3248 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3249 if (eptr >= md->end_subject)
3250 {
3251 SCHECK_PARTIAL();
3252 MRRETURN(MATCH_NOMATCH);
3253 }
3254 GETCHARINC(d, eptr);
3255 if (d < 256) d = md->lcc[d];
3256 if (fc == d) MRRETURN(MATCH_NOMATCH);
3257 }
3258 }
3259 else
3260 #endif
3261 /* Not UTF-8 mode */
3262 {
3263 for (fi = min;; fi++)
3264 {
3265 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3266 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3267 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3268 if (eptr >= md->end_subject)
3269 {
3270 SCHECK_PARTIAL();
3271 MRRETURN(MATCH_NOMATCH);
3272 }
3273 if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3274 }
3275 }
3276 /* Control never gets here */
3277 }
3278
3279 /* Maximize case */
3280
3281 else
3282 {
3283 pp = eptr;
3284
3285 #ifdef SUPPORT_UTF8
3286 /* UTF-8 mode */
3287 if (utf8)
3288 {
3289 register unsigned int d;
3290 for (i = min; i < max; i++)
3291 {
3292 int len = 1;
3293 if (eptr >= md->end_subject)
3294 {
3295 SCHECK_PARTIAL();
3296 break;
3297 }
3298 GETCHARLEN(d, eptr, len);
3299 if (d < 256) d = md->lcc[d];
3300 if (fc == d) break;
3301 eptr += len;
3302 }
3303 if (possessive) continue;
3304 for(;;)
3305 {
3306 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3307 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3308 if (eptr-- == pp) break; /* Stop if tried at original pos */
3309 BACKCHAR(eptr);
3310 }
3311 }
3312 else
3313 #endif
3314 /* Not UTF-8 mode */
3315 {
3316 for (i = min; i < max; i++)
3317 {
3318 if (eptr >= md->end_subject)
3319 {
3320 SCHECK_PARTIAL();
3321 break;
3322 }
3323 if (fc == md->lcc[*eptr]) break;
3324 eptr++;
3325 }
3326 if (possessive) continue;
3327 while (eptr >= pp)
3328 {
3329 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3330 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3331 eptr--;
3332 }
3333 }
3334
3335 MRRETURN(MATCH_NOMATCH);
3336 }
3337 /* Control never gets here */
3338 }
3339
3340 /* Caseful comparisons */
3341
3342 else
3343 {
3344 #ifdef SUPPORT_UTF8
3345 /* UTF-8 mode */
3346 if (utf8)
3347 {
3348 register unsigned int d;
3349 for (i = 1; i <= min; i++)
3350 {
3351 if (eptr >= md->end_subject)
3352 {
3353 SCHECK_PARTIAL();
3354 MRRETURN(MATCH_NOMATCH);
3355 }
3356 GETCHARINC(d, eptr);
3357 if (fc == d) MRRETURN(MATCH_NOMATCH);
3358 }
3359 }
3360 else
3361 #endif
3362 /* Not UTF-8 mode */
3363 {
3364 for (i = 1; i <= min; i++)
3365 {
3366 if (eptr >= md->end_subject)
3367 {
3368 SCHECK_PARTIAL();
3369 MRRETURN(MATCH_NOMATCH);
3370 }
3371 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3372 }
3373 }
3374
3375 if (min == max) continue;
3376
3377 if (minimize)
3378 {
3379 #ifdef SUPPORT_UTF8
3380 /* UTF-8 mode */
3381 if (utf8)
3382 {
3383 register unsigned int d;
3384 for (fi = min;; fi++)
3385 {
3386 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3387 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3388 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3389 if (eptr >= md->end_subject)
3390 {
3391 SCHECK_PARTIAL();
3392 MRRETURN(MATCH_NOMATCH);
3393 }
3394 GETCHARINC(d, eptr);
3395 if (fc == d) MRRETURN(MATCH_NOMATCH);
3396 }
3397 }
3398 else
3399 #endif
3400 /* Not UTF-8 mode */
3401 {
3402 for (fi = min;; fi++)
3403 {
3404 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3405 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3406 if (fi >= max) MRRETURN(MATCH_NOMATCH);
3407 if (eptr >= md->end_subject)
3408 {
3409 SCHECK_PARTIAL();
3410 MRRETURN(MATCH_NOMATCH);
3411 }
3412 if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3413 }
3414 }
3415 /* Control never gets here */
3416 }
3417
3418 /* Maximize case */
3419
3420 else
3421 {
3422 pp = eptr;
3423
3424 #ifdef SUPPORT_UTF8
3425 /* UTF-8 mode */
3426 if (utf8)
3427 {
3428 register unsigned int d;
3429 for (i = min; i < max; i++)
3430 {
3431 int len = 1;
3432 if (eptr >= md->end_subject)
3433 {
3434 SCHECK_PARTIAL();
3435 break;
3436 }
3437 GETCHARLEN(d, eptr, len);
3438 if (fc == d) break;
3439 eptr += len;
3440 }
3441 if (possessive) continue;
3442 for(;;)
3443 {
3444 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3445 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3446 if (eptr-- == pp) break; /* Stop if tried at original pos */
3447 BACKCHAR(eptr);
3448 }
3449 }
3450 else
3451 #endif
3452 /* Not UTF-8 mode */
3453 {
3454 for (i = min; i < max; i++)
3455 {
3456 if (eptr >= md->end_subject)
3457 {
3458 SCHECK_PARTIAL();
3459 break;
3460 }
3461 if (fc == *eptr) break;
3462 eptr++;
3463 }
3464 if (possessive) continue;
3465 while (eptr >= pp)
3466 {
3467 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3468 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3469 eptr--;
3470 }
3471 }
3472
3473 MRRETURN(MATCH_NOMATCH);
3474 }
3475 }
3476 /* Control never gets here */
3477
3478 /* Match a single character type repeatedly; several different opcodes
3479 share code. This is very similar to the code for single characters, but we
3480 repeat it in the interests of efficiency. */
3481
3482 case OP_TYPEEXACT:
3483 min = max = GET2(ecode, 1);
3484 minimize = TRUE;
3485 ecode += 3;
3486 goto REPEATTYPE;
3487
3488 case OP_TYPEUPTO:
3489 case OP_TYPEMINUPTO:
3490 min = 0;
3491 max = GET2(ecode, 1);
3492 minimize = *ecode == OP_TYPEMINUPTO;
3493 ecode += 3;
3494 goto REPEATTYPE;
3495
3496 case OP_TYPEPOSSTAR:
3497 possessive = TRUE;
3498 min = 0;
3499 max = INT_MAX;
3500 ecode++;
3501 goto REPEATTYPE;
3502
3503 case OP_TYPEPOSPLUS:
3504 possessive = TRUE;
3505 min = 1;
3506 max = INT_MAX;
3507 ecode++;
3508 goto REPEATTYPE;
3509
3510 case OP_TYPEPOSQUERY:
3511 possessive = TRUE;
3512 min = 0;
3513 max = 1;
3514 ecode++;
3515 goto REPEATTYPE;
3516
3517 case OP_TYPEPOSUPTO:
3518 possessive = TRUE;
3519 min = 0;
3520 max = GET2(ecode, 1);
3521 ecode += 3;
3522 goto REPEATTYPE;
3523
3524 case OP_TYPESTAR:
3525 case OP_TYPEMINSTAR:
3526 case OP_TYPEPLUS:
3527 case OP_TYPEMINPLUS:
3528 case OP_TYPEQUERY:
3529 case OP_TYPEMINQUERY:
3530 c = *ecode++ - OP_TYPESTAR;
3531 minimize = (c & 1) != 0;
3532 min = rep_min[c]; /* Pick up values from tables; */
3533 max = rep_max[c]; /* zero for max => infinity */
3534 if (max == 0) max = INT_MAX;
3535
3536 /* Common code for all repeated single character type matches. Note that
3537 in UTF-8 mode, '.' matches a character of any length, but for the other
3538 character types, the valid characters are all one-byte long. */
3539
3540 REPEATTYPE:
3541 ctype = *ecode++; /* Code for the character type */
3542
3543 #ifdef SUPPORT_UCP
3544 if (ctype == OP_PROP || ctype == OP_NOTPROP)
3545 {
3546 prop_fail_result = ctype == OP_NOTPROP;
3547 prop_type = *ecode++;
3548 prop_value = *ecode++;
3549 }
3550 else prop_type = -1;
3551 #endif
3552
3553 /* First, ensure the minimum number of matches are present. Use inline
3554 code for maximizing the speed, and do the type test once at the start
3555 (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
3556 is tidier. Also separate the UCP code, which can be the same for both UTF-8
3557 and single-bytes. */
3558
3559 if (min > 0)
3560 {
3561 #ifdef SUPPORT_UCP
3562 if (prop_type >= 0)
3563 {
3564 switch(prop_type)
3565 {
3566 case PT_ANY:
3567 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3568 for (i = 1; i <= min; i++)
3569 {
3570 if (eptr >= md->end_subject)
3571 {
3572 SCHECK_PARTIAL();
3573 MRRETURN(MATCH_NOMATCH);
3574 }
3575 GETCHARINCTEST(c, eptr);
3576 }
3577 break;
3578
3579 case PT_LAMP:
3580 for (i = 1; i <= min; i++)
3581 {
3582 if (eptr >= md->end_subject)
3583 {
3584 SCHECK_PARTIAL();
3585 MRRETURN(MATCH_NOMATCH);
3586 }
3587 GETCHARINCTEST(c, eptr);
3588 prop_chartype = UCD_CHARTYPE(c);
3589 if ((prop_chartype == ucp_Lu ||
3590 prop_chartype == ucp_Ll ||
3591 prop_chartype == ucp_Lt) == prop_fail_result)
3592 MRRETURN(MATCH_NOMATCH);
3593 }
3594 break;
3595
3596 case PT_GC:
3597 for (i = 1; i <= min; i++)
3598 {
3599 if (eptr >= md->end_subject)
3600 {
3601 SCHECK_PARTIAL();
3602 MRRETURN(MATCH_NOMATCH);
3603 }
3604 GETCHARINCTEST(c, eptr);
3605 prop_category = UCD_CATEGORY(c);
3606 if ((prop_category == prop_value) == prop_fail_result)
3607 MRRETURN(MATCH_NOMATCH);
3608 }
3609 break;
3610
3611 case PT_PC:
3612 for (i = 1; i <= min; i++)
3613 {
3614 if (eptr >= md->end_subject)
3615 {
3616 SCHECK_PARTIAL();
3617 MRRETURN(MATCH_NOMATCH);
3618 }
3619 GETCHARINCTEST(c, eptr);
3620 prop_chartype = UCD_CHARTYPE(c);
3621 if ((prop_chartype == prop_value) == prop_fail_result)
3622 MRRETURN(MATCH_NOMATCH);
3623 }
3624 break;
3625
3626 case PT_SC:
3627 for (i = 1; i <= min; i++)
3628 {
3629 if (eptr >= md->end_subject)
3630 {
3631 SCHECK_PARTIAL();
3632 MRRETURN(MATCH_NOMATCH);
3633 }
3634 GETCHARINCTEST(c, eptr);
3635 prop_script = UCD_SCRIPT(c);
3636 if ((prop_script == prop_value) == prop_fail_result)
3637 MRRETURN(MATCH_NOMATCH);
3638 }
3639 break;
3640
3641 case PT_ALNUM:
3642 for (i = 1; i <= min; i++)
3643 {
3644 if (eptr >= md->end_subject)
3645 {
3646 SCHECK_PARTIAL();
3647 MRRETURN(MATCH_NOMATCH);
3648 }
3649 GETCHARINCTEST(c, eptr);
3650 prop_category = UCD_CATEGORY(c);
3651 if ((prop_category == ucp_L || prop_category == ucp_N)
3652 == prop_fail_result)
3653 MRRETURN(MATCH_NOMATCH);
3654 }
3655 break;
3656
3657 case PT_SPACE: /* Perl space */
3658 for (i = 1; i <= min; i++)
3659 {
3660 if (eptr >= md->end_subject)
3661 {
3662 SCHECK_PARTIAL();
3663 MRRETURN(MATCH_NOMATCH);
3664 }
3665 GETCHARINCTEST(c, eptr);
3666 prop_category = UCD_CATEGORY(c);
3667 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668 c == CHAR_FF || c == CHAR_CR)
3669 == prop_fail_result)
3670 MRRETURN(MATCH_NOMATCH);
3671 }
3672 break;
3673
3674 case PT_PXSPACE: /* POSIX space */
3675 for (i = 1; i <= min; i++)
3676 {
3677 if (eptr >= md->end_subject)
3678 {
3679 SCHECK_PARTIAL();
3680 MRRETURN(MATCH_NOMATCH);
3681 }
3682 GETCHARINCTEST(c, eptr);
3683 prop_category = UCD_CATEGORY(c);
3684 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3685 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3686 == prop_fail_result)
3687 MRRETURN(MATCH_NOMATCH);
3688 }
3689 break;
3690
3691 case PT_WORD:
3692 for (i = 1; i <= min; i++)
3693 {
3694 if (eptr >= md->end_subject)
3695 {
3696 SCHECK_PARTIAL();
3697 MRRETURN(MATCH_NOMATCH);
3698 }
3699 GETCHARINCTEST(c, eptr);
3700 prop_category = UCD_CATEGORY(c);
3701 if ((prop_category == ucp_L || prop_category == ucp_N ||
3702 c == CHAR_UNDERSCORE)
3703 == prop_fail_result)
3704 MRRETURN(MATCH_NOMATCH);
3705 }
3706 break;
3707
3708 /* This should not occur */
3709
3710 default:
3711 RRETURN(PCRE_ERROR_INTERNAL);
3712 }
3713 }
3714
3715 /* Match extended Unicode sequences. We will get here only if the
3716 support is in the binary; otherwise a compile-time error occurs. */
3717
3718 else if (ctype == OP_EXTUNI)
3719 {
3720 for (i = 1; i <= min; i++)
3721 {
3722 if (eptr >= md->end_subject)
3723 {
3724 SCHECK_PARTIAL();
3725 MRRETURN(MATCH_NOMATCH);
3726 }
3727 GETCHARINCTEST(c, eptr);
3728 prop_category = UCD_CATEGORY(c);
3729 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3730 while (eptr < md->end_subject)
3731 {
3732 int len = 1;
3733 if (!utf8) c = *eptr;
3734 else { GETCHARLEN(c, eptr, len); }
3735 prop_category = UCD_CATEGORY(c);
3736 if (prop_category != ucp_M) break;
3737 eptr += len;
3738 }
3739 }
3740 }
3741
3742 else
3743 #endif /* SUPPORT_UCP */
3744
3745 /* Handle all other cases when the coding is UTF-8 */
3746
3747 #ifdef SUPPORT_UTF8
3748 if (utf8) switch(ctype)
3749 {
3750 case OP_ANY:
3751 for (i = 1; i <= min; i++)
3752 {
3753 if (eptr >= md->end_subject)
3754 {
3755 SCHECK_PARTIAL();
3756 MRRETURN(MATCH_NOMATCH);
3757 }
3758 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3759 eptr++;
3760 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3761 }
3762 break;
3763
3764 case OP_ALLANY:
3765 for (i = 1; i <= min; i++)
3766 {
3767 if (eptr >= md->end_subject)
3768 {
3769 SCHECK_PARTIAL();
3770 MRRETURN(MATCH_NOMATCH);
3771 }
3772 eptr++;
3773 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3774 }
3775 break;
3776
3777 case OP_ANYBYTE:
3778 if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3779 eptr += min;
3780 break;
3781
3782 case OP_ANYNL:
3783 for (i = 1; i <= min; i++)
3784 {
3785 if (eptr >= md->end_subject)
3786 {
3787 SCHECK_PARTIAL();
3788 MRRETURN(MATCH_NOMATCH);
3789 }
3790 GETCHARINC(c, eptr);
3791 switch(c)
3792 {
3793 default: MRRETURN(MATCH_NOMATCH);
3794 case 0x000d:
3795 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3796 break;
3797
3798 case 0x000a:
3799 break;
3800
3801 case 0x000b:
3802 case 0x000c:
3803 case 0x0085:
3804 case 0x2028:
3805 case 0x2029:
3806 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3807 break;
3808 }
3809 }
3810 break;
3811
3812 case OP_NOT_HSPACE:
3813 for (i = 1; i <= min; i++)
3814 {
3815 if (eptr >= md->end_subject)
3816 {
3817 SCHECK_PARTIAL();
3818 MRRETURN(MATCH_NOMATCH);
3819 }
3820 GETCHARINC(c, eptr);
3821 switch(c)
3822 {
3823 default: break;
3824 case 0x09: /* HT */
3825 case 0x20: /* SPACE */
3826 case 0xa0: /* NBSP */
3827 case 0x1680: /* OGHAM SPACE MARK */
3828 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3829 case 0x2000: /* EN QUAD */
3830 case 0x2001: /* EM QUAD */
3831 case 0x2002: /* EN SPACE */
3832 case 0x2003: /* EM SPACE */
3833 case 0x2004: /* THREE-PER-EM SPACE */
3834 case 0x2005: /* FOUR-PER-EM SPACE */
3835 case 0x2006: /* SIX-PER-EM SPACE */
3836 case 0x2007: /* FIGURE SPACE */
3837 case 0x2008: /* PUNCTUATION SPACE */
3838 case 0x2009: /* THIN SPACE */
3839 case 0x200A: /* HAIR SPACE */
3840 case 0x202f: /* NARROW NO-BREAK SPACE */
3841 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3842 case 0x3000: /* IDEOGRAPHIC SPACE */
3843 MRRETURN(MATCH_NOMATCH);
3844 }
3845 }
3846 break;
3847
3848 case OP_HSPACE:
3849 for (i = 1; i <= min; i++)
3850 {
3851 if (eptr >= md->end_subject)
3852 {
3853 SCHECK_PARTIAL();
3854 MRRETURN(MATCH_NOMATCH);
3855 }
3856 GETCHARINC(c, eptr);
3857 switch(c)
3858 {
3859 default: MRRETURN(MATCH_NOMATCH);
3860 case 0x09: /* HT */
3861 case 0x20: /* SPACE */
3862 case 0xa0: /* NBSP */
3863 case 0x1680: /* OGHAM SPACE MARK */
3864 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3865 case 0x2000: /* EN QUAD */
3866 case 0x2001: /* EM QUAD */
3867 case 0x2002: /* EN SPACE */
3868 case 0x2003: /* EM SPACE */
3869 case 0x2004: /* THREE-PER-EM SPACE */
3870 case 0x2005: /* FOUR-PER-EM SPACE */
3871 case 0x2006: /* SIX-PER-EM SPACE */
3872 case 0x2007: /* FIGURE SPACE */
3873 case 0x2008: /* PUNCTUATION SPACE */
3874 case 0x2009: /* THIN SPACE */
3875 case 0x200A: /* HAIR SPACE */
3876 case 0x202f: /* NARROW NO-BREAK SPACE */
3877 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3878 case 0x3000: /* IDEOGRAPHIC SPACE */
3879 break;
3880 }
3881 }
3882 break;
3883
3884 case OP_NOT_VSPACE:
3885 for (i = 1; i <= min; i++)
3886 {
3887 if (eptr >= md->end_subject)
3888 {
3889 SCHECK_PARTIAL();
3890 MRRETURN(MATCH_NOMATCH);
3891 }
3892 GETCHARINC(c, eptr);
3893 switch(c)
3894 {
3895 default: break;
3896 case 0x0a: /* LF */
3897 case 0x0b: /* VT */
3898 case 0x0c: /* FF */
3899 case 0x0d: /* CR */
3900 case 0x85: /* NEL */
3901 case 0x2028: /* LINE SEPARATOR */
3902 case 0x2029: /* PARAGRAPH SEPARATOR */
3903 MRRETURN(MATCH_NOMATCH);
3904 }
3905 }
3906 break;
3907
3908 case OP_VSPACE:
3909 for (i = 1; i <= min; i++)
3910 {
3911 if (eptr >= md->end_subject)
3912 {
3913 SCHECK_PARTIAL();
3914 MRRETURN(MATCH_NOMATCH);
3915 }
3916 GETCHARINC(c, eptr);
3917 switch(c)
3918 {
3919 default: MRRETURN(MATCH_NOMATCH);
3920 case 0x0a: /* LF */
3921 case 0x0b: /* VT */
3922 case 0x0c: /* FF */
3923 case 0x0d: /* CR */
3924 case 0x85: /* NEL */
3925 case 0x2028: /* LINE SEPARATOR */
3926 case 0x2029: /* PARAGRAPH SEPARATOR */
3927 break;
3928 }
3929 }
3930 break;
3931
3932 case OP_NOT_DIGIT:
3933 for (i = 1; i <= min; i++)
3934 {
3935 if (eptr >= md->end_subject)
3936 {
3937 SCHECK_PARTIAL();
3938 MRRETURN(MATCH_NOMATCH);
3939 }
3940 GETCHARINC(c, eptr);
3941 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3942 MRRETURN(MATCH_NOMATCH);
3943 }
3944 break;
3945
3946 case OP_DIGIT:
3947 for (i = 1; i <= min; i++)
3948 {
3949 if (eptr >= md->end_subject)
3950 {
3951 SCHECK_PARTIAL();
3952 MRRETURN(MATCH_NOMATCH);
3953 }
3954 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3955 MRRETURN(MATCH_NOMATCH);
3956 /* No need to skip more bytes - we know it's a 1-byte character */
3957 }
3958 break;
3959
3960 case OP_NOT_WHITESPACE:
3961 for (i = 1; i <= min; i++)
3962 {
3963 if (eptr >= md->end_subject)
3964 {
3965 SCHECK_PARTIAL();
3966 MRRETURN(MATCH_NOMATCH);
3967 }
3968 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3969 MRRETURN(MATCH_NOMATCH);
3970 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3971 }
3972 break;
3973
3974 case OP_WHITESPACE:
3975 for (i = 1; i <= min; i++)
3976 {
3977 if (eptr >= md->end_subject)
3978 {
3979 SCHECK_PARTIAL();
3980 MRRETURN(MATCH_NOMATCH);
3981 }
3982 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3983 MRRETURN(MATCH_NOMATCH);
3984 /* No need to skip more bytes - we know it's a 1-byte character */
3985 }
3986 break;
3987
3988 case OP_NOT_WORDCHAR:
3989 for (i = 1; i <= min; i++)
3990 {
3991 if (eptr >= md->end_subject)
3992 {
3993 SCHECK_PARTIAL();
3994 MRRETURN(MATCH_NOMATCH);
3995 }
3996 if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3997 MRRETURN(MATCH_NOMATCH);
3998 while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3999 }
4000 break;
4001
4002 case OP_WORDCHAR:
4003 for (i = 1; i <= min; i++)
4004 {
4005 if (eptr >= md->end_subject)
4006 {
4007 SCHECK_PARTIAL();
4008 MRRETURN(MATCH_NOMATCH);
4009 }
4010 if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
4011 MRRETURN(MATCH_NOMATCH);
4012 /* No need to skip more bytes - we know it's a 1-byte character */
4013 }
4014 break;
4015
4016 default:
4017 RRETURN(PCRE_ERROR_INTERNAL);
4018 } /* End switch(ctype) */
4019
4020 else
4021 #endif /* SUPPORT_UTF8 */
4022
4023 /* Code for the non-UTF-8 case for minimum matching of operators other
4024 than OP_PROP and OP_NOTPROP. */
4025
4026 switch(ctype)
4027 {
4028 case OP_ANY:
4029 for (i = 1; i <= min; i++)
4030 {
4031 if (eptr >= md->end_subject)
4032 {
4033 SCHECK_PARTIAL();
4034 MRRETURN(MATCH_NOMATCH);
4035 }
4036 if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4037 eptr++;
4038 }
4039 break;
4040
4041 case OP_ALLANY:
4042 if (eptr > md->end_subject - min)
4043 {
4044 SCHECK_PARTIAL();
4045 MRRETURN(MATCH_NOMATCH);
4046 }
4047 eptr += min;
4048 break;
4049
4050 case OP_ANYBYTE:
4051 if (eptr > md->end_subject - min)
4052 {
4053 SCHECK_PARTIAL();
4054 MRRETURN(MATCH_NOMATCH);
4055 }
4056 eptr += min;
4057 break;
4058
4059 case OP_ANYNL:
4060 for (i = 1; i <= min; i++)
4061 {
4062 if (eptr >= md->end_subject)
4063 {
4064 SCHECK_PARTIAL();
4065 MRRETURN(MATCH_NOMATCH);
4066 }
4067 switch(*eptr++)
4068 {
4069 default: MRRETURN(MATCH_NOMATCH);
4070 case 0x000d:
4071 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4072 break;
4073 case 0x000a:
4074 break;
4075
4076 case 0x000b:
4077 case 0x000c:
4078 case 0x0085:
4079 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4080 break;
4081 }
4082 }
4083 break;
4084
4085 case OP_NOT_HSPACE:
4086 for (i = 1; i <= min; i++)
4087 {
4088 if (eptr >= md->end_subject)
4089 {
4090 SCHECK_PARTIAL();
4091 MRRETURN(MATCH_NOMATCH);
4092 }
4093 switch(*eptr++)
4094 {
4095 default: break;
4096 case 0x09: /* HT */
4097 case 0x20: /* SPACE */
4098 case 0xa0: /* NBSP */
4099 MRRETURN(MATCH_NOMATCH);
4100 }
4101 }
4102 break;
4103
4104 case OP_HSPACE:
4105 for (i = 1; i <= min; i++)
4106 {
4107 if (eptr >= md->end_subject)
4108 {
4109 SCHECK_PARTIAL();
4110 MRRETURN(MATCH_NOMATCH);
4111 }
4112 switch(*eptr++)
4113 {
4114 default: MRRETURN(MATCH_NOMATCH);
4115 case 0x09: /* HT */
4116 case 0x20: /* SPACE */
4117 case 0xa0: /* NBSP */
4118 break;
4119 }
4120 }
4121 break;
4122
4123 case OP_NOT_VSPACE:
4124 for (i = 1; i <= min; i++)
4125 {
4126 if (eptr >= md->end_subject)
4127 {
4128 SCHECK_PARTIAL();
4129 MRRETURN(MATCH_NOMATCH);
4130 }
4131 switch(*eptr++)
4132 {
4133 default: break;
4134 case 0x0a: /* LF */
4135 case 0x0b: /* VT */
4136 case 0x0c: /* FF */
4137 case 0x0d: /* CR */
4138 case 0x85: /* NEL */
4139 MRRETURN(MATCH_NOMATCH);
4140 }
4141 }
4142 break;
4143
4144 case OP_VSPACE:
4145 for (i = 1; i <= min; i++)
4146 {
4147 if (eptr >= md->end_subject)
4148 {
4149 SCHECK_PARTIAL();
4150 MRRETURN(MATCH_NOMATCH);
4151 }
4152 switch(*eptr++)
4153 {
4154 default: MRRETURN(MATCH_NOMATCH);
4155 case 0x0a: /* LF */
4156 case 0x0b: /* VT */
4157 case 0x0c: /* FF */
4158 case 0x0d: /* CR */
4159 case 0x85: /* NEL */
4160 break;
4161 }
4162 }
4163 break;
4164
4165 case OP_NOT_DIGIT:
4166 for (i = 1; i <= min; i++)
4167 {
4168 if (eptr >= md->end_subject)
4169 {
4170 SCHECK_PARTIAL();
4171 MRRETURN(MATCH_NOMATCH);
4172 }
4173 if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4174 }
4175 break;
4176
4177 case OP_DIGIT:
4178 for (i = 1; i <= min; i++)
4179 {
4180 if (eptr >= md->end_subject)
4181 {
4182 SCHECK_PARTIAL();
4183 MRRETURN(MATCH_NOMATCH);
4184 }
4185 if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4186 }
4187 break;
4188
4189 case OP_NOT_WHITESPACE:
4190 for (i = 1; i <= min; i++)
4191 {
4192 if (eptr >= md->end_subject)
4193 {
4194 SCHECK_PARTIAL();
4195 MRRETURN(MATCH_NOMATCH);
4196 }
4197 if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4198 }
4199 break;
4200
4201 case OP_WHITESPACE:
4202 for (i = 1; i <= min; i++)
4203 {
4204 if (eptr >= md->end_subject)
4205 {
4206 SCHECK_PARTIAL();
4207 MRRETURN(MATCH_NOMATCH);
4208 }
4209 if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4210 }
4211 break;
4212
4213 case OP_NOT_WORDCHAR:
4214 for (i = 1; i <= min; i++)
4215 {
4216 if (eptr >= md->end_subject)
4217 {
4218 SCHECK_PARTIAL();
4219 MRRETURN(MATCH_NOMATCH);
4220 }
4221 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4222 MRRETURN(MATCH_NOMATCH);
4223 }
4224 break;
4225
4226 case OP_WORDCHAR:
4227 for (i = 1; i <= min; i++)
4228 {
4229 if (eptr >= md->end_subject)
4230 {
4231 SCHECK_PARTIAL();
4232 MRRETURN(MATCH_NOMATCH);
4233 }
4234 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4235 MRRETURN(MATCH_NOMATCH);
4236 }
4237 break;
4238
4239 default:
4240 RRETURN(PCRE_ERROR_INTERNAL);
4241 }
4242 }
4243
4244 /* If min = max, continue at the same level without recursing */
4245
4246 if (min == max) continue;
4247
4248 /* If minimizing, we have to test the rest of the pattern before each
4249 subsequent match. Again, separate the UTF-8 case for speed, and also
4250 separate the UCP cases. */
4251
4252 if (minimize)
4253 {
4254 #ifdef SUPPORT_UCP
4255 if (prop_type >= 0)
4256 {
4257 switch(prop_type)
4258 {
4259 case PT_ANY:
4260 for (fi = min;; fi++)
4261 {
4262 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4263 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4264 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4265 if (eptr >= md->end_subject)
4266 {
4267 SCHECK_PARTIAL();
4268 MRRETURN(MATCH_NOMATCH);
4269 }
4270 GETCHARINCTEST(c, eptr);
4271 if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4272 }
4273 /* Control never gets here */
4274
4275 case PT_LAMP:
4276 for (fi = min;; fi++)
4277 {
4278 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4279 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4280 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4281 if (eptr >= md->end_subject)
4282 {
4283 SCHECK_PARTIAL();
4284 MRRETURN(MATCH_NOMATCH);
4285 }
4286 GETCHARINCTEST(c, eptr);
4287 prop_chartype = UCD_CHARTYPE(c);
4288 if ((prop_chartype == ucp_Lu ||
4289 prop_chartype == ucp_Ll ||
4290 prop_chartype == ucp_Lt) == prop_fail_result)
4291 MRRETURN(MATCH_NOMATCH);
4292 }
4293 /* Control never gets here */
4294
4295 case PT_GC:
4296 for (fi = min;; fi++)
4297 {
4298 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4299 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4300 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4301 if (eptr >= md->end_subject)
4302 {
4303 SCHECK_PARTIAL();
4304 MRRETURN(MATCH_NOMATCH);
4305 }
4306 GETCHARINCTEST(c, eptr);
4307 prop_category = UCD_CATEGORY(c);
4308 if ((prop_category == prop_value) == prop_fail_result)
4309 MRRETURN(MATCH_NOMATCH);
4310 }
4311 /* Control never gets here */
4312
4313 case PT_PC:
4314 for (fi = min;; fi++)
4315 {
4316 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4317 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4318 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4319 if (eptr >= md->end_subject)
4320 {
4321 SCHECK_PARTIAL();
4322 MRRETURN(MATCH_NOMATCH);
4323 }
4324 GETCHARINCTEST(c, eptr);
4325 prop_chartype = UCD_CHARTYPE(c);
4326 if ((prop_chartype == prop_value) == prop_fail_result)
4327 MRRETURN(MATCH_NOMATCH);
4328 }
4329 /* Control never gets here */
4330
4331 case PT_SC:
4332 for (fi = min;; fi++)
4333 {
4334 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4335 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4336 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4337 if (eptr >= md->end_subject)
4338 {
4339 SCHECK_PARTIAL();
4340 MRRETURN(MATCH_NOMATCH);
4341 }
4342 GETCHARINCTEST(c, eptr);
4343 prop_script = UCD_SCRIPT(c);
4344 if ((prop_script == prop_value) == prop_fail_result)
4345 MRRETURN(MATCH_NOMATCH);
4346 }
4347 /* Control never gets here */
4348
4349 case PT_ALNUM:
4350 for (fi = min;; fi++)
4351 {
4352 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4353 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4354 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4355 if (eptr >= md->end_subject)
4356 {
4357 SCHECK_PARTIAL();
4358 MRRETURN(MATCH_NOMATCH);
4359 }
4360 GETCHARINCTEST(c, eptr);
4361 prop_category = UCD_CATEGORY(c);
4362 if ((prop_category == ucp_L || prop_category == ucp_N)
4363 == prop_fail_result)
4364 MRRETURN(MATCH_NOMATCH);
4365 }
4366 /* Control never gets here */
4367
4368 case PT_SPACE: /* Perl space */
4369 for (fi = min;; fi++)
4370 {
4371 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4372 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4373 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4374 if (eptr >= md->end_subject)
4375 {
4376 SCHECK_PARTIAL();
4377 MRRETURN(MATCH_NOMATCH);
4378 }
4379 GETCHARINCTEST(c, eptr);
4380 prop_category = UCD_CATEGORY(c);
4381 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4382 c == CHAR_FF || c == CHAR_CR)
4383 == prop_fail_result)
4384 MRRETURN(MATCH_NOMATCH);
4385 }
4386 /* Control never gets here */
4387
4388 case PT_PXSPACE: /* POSIX space */
4389 for (fi = min;; fi++)
4390 {
4391 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4392 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4393 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4394 if (eptr >= md->end_subject)
4395 {
4396 SCHECK_PARTIAL();
4397 MRRETURN(MATCH_NOMATCH);
4398 }
4399 GETCHARINCTEST(c, eptr);
4400 prop_category = UCD_CATEGORY(c);
4401 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4402 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4403 == prop_fail_result)
4404 MRRETURN(MATCH_NOMATCH);
4405 }
4406 /* Control never gets here */
4407
4408 case PT_WORD:
4409 for (fi = min;; fi++)
4410 {
4411 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4412 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4413 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4414 if (eptr >= md->end_subject)
4415 {
4416 SCHECK_PARTIAL();
4417 MRRETURN(MATCH_NOMATCH);
4418 }
4419 GETCHARINCTEST(c, eptr);
4420 prop_category = UCD_CATEGORY(c);
4421 if ((prop_category == ucp_L ||
4422 prop_category == ucp_N ||
4423 c == CHAR_UNDERSCORE)
4424 == prop_fail_result)
4425 MRRETURN(MATCH_NOMATCH);
4426 }
4427 /* Control never gets here */
4428
4429 /* This should never occur */
4430
4431 default:
4432 RRETURN(PCRE_ERROR_INTERNAL);
4433 }
4434 }
4435
4436 /* Match extended Unicode sequences. We will get here only if the
4437 support is in the binary; otherwise a compile-time error occurs. */
4438
4439 else if (ctype == OP_EXTUNI)
4440 {
4441 for (fi = min;; fi++)
4442 {
4443 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4444 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4445 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4446 if (eptr >= md->end_subject)
4447 {
4448 SCHECK_PARTIAL();
4449 MRRETURN(MATCH_NOMATCH);
4450 }
4451 GETCHARINCTEST(c, eptr);
4452 prop_category = UCD_CATEGORY(c);
4453 if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4454 while (eptr < md->end_subject)
4455 {
4456 int len = 1;
4457 if (!utf8) c = *eptr;
4458 else { GETCHARLEN(c, eptr, len); }
4459 prop_category = UCD_CATEGORY(c);
4460 if (prop_category != ucp_M) break;
4461 eptr += len;
4462 }
4463 }
4464 }
4465
4466 else
4467 #endif /* SUPPORT_UCP */
4468
4469 #ifdef SUPPORT_UTF8
4470 /* UTF-8 mode */
4471 if (utf8)
4472 {
4473 for (fi = min;; fi++)
4474 {
4475 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4476 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4477 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4478 if (eptr >= md->end_subject)
4479 {
4480 SCHECK_PARTIAL();
4481 MRRETURN(MATCH_NOMATCH);
4482 }
4483 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4484 MRRETURN(MATCH_NOMATCH);
4485 GETCHARINC(c, eptr);
4486 switch(ctype)
4487 {
4488 case OP_ANY: /* This is the non-NL case */
4489 case OP_ALLANY:
4490 case OP_ANYBYTE:
4491 break;
4492
4493 case OP_ANYNL:
4494 switch(c)
4495 {
4496 default: MRRETURN(MATCH_NOMATCH);
4497 case 0x000d:
4498 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4499 break;
4500 case 0x000a:
4501 break;
4502
4503 case 0x000b:
4504 case 0x000c:
4505 case 0x0085:
4506 case 0x2028:
4507 case 0x2029:
4508 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4509 break;
4510 }
4511 break;
4512
4513 case OP_NOT_HSPACE:
4514 switch(c)
4515 {
4516 default: break;
4517 case 0x09: /* HT */
4518 case 0x20: /* SPACE */
4519 case 0xa0: /* NBSP */
4520 case 0x1680: /* OGHAM SPACE MARK */
4521 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4522 case 0x2000: /* EN QUAD */
4523 case 0x2001: /* EM QUAD */
4524 case 0x2002: /* EN SPACE */
4525 case 0x2003: /* EM SPACE */
4526 case 0x2004: /* THREE-PER-EM SPACE */
4527 case 0x2005: /* FOUR-PER-EM SPACE */
4528 case 0x2006: /* SIX-PER-EM SPACE */
4529 case 0x2007: /* FIGURE SPACE */
4530 case 0x2008: /* PUNCTUATION SPACE */
4531 case 0x2009: /* THIN SPACE */
4532 case 0x200A: /* HAIR SPACE */
4533 case 0x202f: /* NARROW NO-BREAK SPACE */
4534 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4535 case 0x3000: /* IDEOGRAPHIC SPACE */
4536 MRRETURN(MATCH_NOMATCH);
4537 }
4538 break;
4539
4540 case OP_HSPACE:
4541 switch(c)
4542 {
4543 default: MRRETURN(MATCH_NOMATCH);
4544 case 0x09: /* HT */
4545 case 0x20: /* SPACE */
4546 case 0xa0: /* NBSP */
4547 case 0x1680: /* OGHAM SPACE MARK */
4548 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
4549 case 0x2000: /* EN QUAD */
4550 case 0x2001: /* EM QUAD */
4551 case 0x2002: /* EN SPACE */
4552 case 0x2003: /* EM SPACE */
4553 case 0x2004: /* THREE-PER-EM SPACE */
4554 case 0x2005: /* FOUR-PER-EM SPACE */
4555 case 0x2006: /* SIX-PER-EM SPACE */
4556 case 0x2007: /* FIGURE SPACE */
4557 case 0x2008: /* PUNCTUATION SPACE */
4558 case 0x2009: /* THIN SPACE */
4559 case 0x200A: /* HAIR SPACE */
4560 case 0x202f: /* NARROW NO-BREAK SPACE */
4561 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
4562 case 0x3000: /* IDEOGRAPHIC SPACE */
4563 break;
4564 }
4565 break;
4566
4567 case OP_NOT_VSPACE:
4568 switch(c)
4569 {
4570 default: break;
4571 case 0x0a: /* LF */
4572 case 0x0b: /* VT */
4573 case 0x0c: /* FF */
4574 case 0x0d: /* CR */
4575 case 0x85: /* NEL */
4576 case 0x2028: /* LINE SEPARATOR */
4577 case 0x2029: /* PARAGRAPH SEPARATOR */
4578 MRRETURN(MATCH_NOMATCH);
4579 }
4580 break;
4581
4582 case OP_VSPACE:
4583 switch(c)
4584 {
4585 default: MRRETURN(MATCH_NOMATCH);
4586 case 0x0a: /* LF */
4587 case 0x0b: /* VT */
4588 case 0x0c: /* FF */
4589 case 0x0d: /* CR */
4590 case 0x85: /* NEL */
4591 case 0x2028: /* LINE SEPARATOR */
4592 case 0x2029: /* PARAGRAPH SEPARATOR */
4593 break;
4594 }
4595 break;
4596
4597 case OP_NOT_DIGIT:
4598 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4599 MRRETURN(MATCH_NOMATCH);
4600 break;
4601
4602 case OP_DIGIT:
4603 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4604 MRRETURN(MATCH_NOMATCH);
4605 break;
4606
4607 case OP_NOT_WHITESPACE:
4608 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4609 MRRETURN(MATCH_NOMATCH);
4610 break;
4611
4612 case OP_WHITESPACE:
4613 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4614 MRRETURN(MATCH_NOMATCH);
4615 break;
4616
4617 case OP_NOT_WORDCHAR:
4618 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4619 MRRETURN(MATCH_NOMATCH);
4620 break;
4621
4622 case OP_WORDCHAR:
4623 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4624 MRRETURN(MATCH_NOMATCH);
4625 break;
4626
4627 default:
4628 RRETURN(PCRE_ERROR_INTERNAL);
4629 }
4630 }
4631 }
4632 else
4633 #endif
4634 /* Not UTF-8 mode */
4635 {
4636 for (fi = min;; fi++)
4637 {
4638 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4639 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4640 if (fi >= max) MRRETURN(MATCH_NOMATCH);
4641 if (eptr >= md->end_subject)
4642 {
4643 SCHECK_PARTIAL();
4644 MRRETURN(MATCH_NOMATCH);
4645 }
4646 if (ctype == OP_ANY && IS_NEWLINE(eptr))
4647 MRRETURN(MATCH_NOMATCH);
4648 c = *eptr++;
4649 switch(ctype)
4650 {
4651 case OP_ANY: /* This is the non-NL case */
4652 case OP_ALLANY:
4653 case OP_ANYBYTE:
4654 break;
4655
4656 case OP_ANYNL:
4657 switch(c)
4658 {
4659 default: MRRETURN(MATCH_NOMATCH);
4660 case 0x000d:
4661 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4662 break;
4663
4664 case 0x000a:
4665 break;
4666
4667 case 0x000b:
4668 case 0x000c:
4669 case 0x0085:
4670 if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4671 break;
4672 }
4673 break;
4674
4675 case OP_NOT_HSPACE:
4676 switch(c)
4677 {
4678 default: break;
4679 case 0x09: /* HT */
4680 case 0x20: /* SPACE */
4681 case 0xa0: /* NBSP */
4682 MRRETURN(MATCH_NOMATCH);
4683 }
4684 break;
4685
4686 case OP_HSPACE:
4687 switch(c)
4688 {
4689 default: MRRETURN(MATCH_NOMATCH);
4690 case 0x09: /* HT */
4691 case 0x20: /* SPACE */
4692 case 0xa0: /* NBSP */
4693 break;
4694 }
4695 break;
4696
4697 case OP_NOT_VSPACE:
4698 switch(c)
4699 {
4700 default: break;
4701 case 0x0a: /* LF */
4702 case 0x0b: /* VT */
4703 case 0x0c: /* FF */
4704 case 0x0d: /* CR */
4705 case 0x85: /* NEL */
4706 MRRETURN(MATCH_NOMATCH);
4707 }
4708 break;
4709
4710 case OP_VSPACE:
4711 switch(c)
4712 {
4713 default: MRRETURN(MATCH_NOMATCH);
4714 case 0x0a: /* LF */
4715 case 0x0b: /* VT */
4716 case 0x0c: /* FF */
4717 case 0x0d: /* CR */
4718 case 0x85: /* NEL */
4719 break;
4720 }
4721 break;
4722
4723 case OP_NOT_DIGIT:
4724 if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4725 break;
4726
4727 case OP_DIGIT:
4728 if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4729 break;
4730
4731 case OP_NOT_WHITESPACE:
4732 if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4733 break;
4734
4735 case OP_WHITESPACE:
4736 if ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4737 break;
4738
4739 case OP_NOT_WORDCHAR:
4740 if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4741 break;
4742
4743 case OP_WORDCHAR:
4744 if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4745 break;
4746
4747 default:
4748 RRETURN(PCRE_ERROR_INTERNAL);
4749 }
4750 }
4751 }
4752 /* Control never gets here */
4753 }
4754
4755 /* If maximizing, it is worth using inline code for speed, doing the type
4756 test once at the start (i.e. keep it out of the loop). Again, keep the
4757 UTF-8 and UCP stuff separate. */
4758
4759 else
4760 {
4761 pp = eptr; /* Remember where we started */
4762
4763 #ifdef SUPPORT_UCP
4764 if (prop_type >= 0)
4765 {
4766 switch(prop_type)
4767 {
4768 case PT_ANY:
4769 for (i = min; i < max; i++)
4770 {
4771 int len = 1;
4772 if (eptr >= md->end_subject)
4773 {
4774 SCHECK_PARTIAL();
4775 break;
4776 }
4777 GETCHARLENTEST(c, eptr, len);
4778 if (prop_fail_result) break;
4779 eptr+= len;
4780 }
4781 break;
4782
4783 case PT_LAMP:
4784 for (i = min; i < max; i++)
4785 {
4786 int len = 1;
4787 if (eptr >= md->end_subject)
4788 {
4789 SCHECK_PARTIAL();
4790 break;
4791 }
4792 GETCHARLENTEST(c, eptr, len);
4793 prop_chartype = UCD_CHARTYPE(c);
4794 if ((prop_chartype == ucp_Lu ||
4795 prop_chartype == ucp_Ll ||
4796 prop_chartype == ucp_Lt) == prop_fail_result)
4797 break;
4798 eptr+= len;
4799 }
4800 break;
4801
4802 case PT_GC:
4803 for (i = min; i < max; i++)
4804 {
4805 int len = 1;
4806 if (eptr >= md->end_subject)
4807 {
4808 SCHECK_PARTIAL();
4809 break;
4810 }
4811 GETCHARLENTEST(c, eptr, len);
4812 prop_category = UCD_CATEGORY(c);
4813 if ((prop_category == prop_value) == prop_fail_result)
4814 break;
4815 eptr+= len;
4816 }
4817 break;
4818
4819 case PT_PC:
4820 for (i = min; i < max; i++)
4821 {
4822 int len = 1;
4823 if (eptr >= md->end_subject)
4824 {
4825 SCHECK_PARTIAL();
4826 break;
4827 }
4828 GETCHARLENTEST(c, eptr, len);
4829 prop_chartype = UCD_CHARTYPE(c);
4830 if ((prop_chartype == prop_value) == prop_fail_result)
4831 break;
4832 eptr+= len;
4833 }
4834 break;
4835
4836 case PT_SC:
4837 for (i = min; i < max; i++)
4838 {
4839 int len = 1;
4840 if (eptr >= md->end_subject)
4841 {
4842 SCHECK_PARTIAL();
4843 break;
4844 }
4845 GETCHARLENTEST(c, eptr, len);
4846 prop_script = UCD_SCRIPT(c);
4847 if ((prop_script == prop_value) == prop_fail_result)
4848 break;
4849 eptr+= len;
4850 }
4851 break;
4852
4853 case PT_ALNUM:
4854 for (i = min; i < max; i++)
4855 {
4856 int len = 1;
4857 if (eptr >= md->end_subject)
4858 {
4859 SCHECK_PARTIAL();
4860 break;
4861 }
4862 GETCHARLENTEST(c, eptr, len);
4863 prop_category = UCD_CATEGORY(c);
4864 if ((prop_category == ucp_L || prop_category == ucp_N)
4865 == prop_fail_result)
4866 break;
4867 eptr+= len;
4868 }
4869 break;
4870
4871 case PT_SPACE: /* Perl space */
4872 for (i = min; i < max; i++)
4873 {
4874 int len = 1;
4875 if (eptr >= md->end_subject)
4876 {
4877 SCHECK_PARTIAL();
4878 break;
4879 }
4880 GETCHARLENTEST(c, eptr, len);
4881 prop_category = UCD_CATEGORY(c);
4882 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4883 c == CHAR_FF || c == CHAR_CR)
4884 == prop_fail_result)
4885 break;
4886 eptr+= len;
4887 }
4888 break;
4889
4890 case PT_PXSPACE: /* POSIX space */
4891 for (i = min; i < max; i++)
4892 {
4893 int len = 1;
4894 if (eptr >= md->end_subject)
4895 {
4896 SCHECK_PARTIAL();
4897 break;
4898 }
4899 GETCHARLENTEST(c, eptr, len);
4900 prop_category = UCD_CATEGORY(c);
4901 if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4902 c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4903 == prop_fail_result)
4904 break;
4905 eptr+= len;
4906 }
4907 break;
4908
4909 case PT_WORD:
4910 for (i = min; i < max; i++)
4911 {
4912 int len = 1;
4913 if (eptr >= md->end_subject)
4914 {
4915 SCHECK_PARTIAL();
4916 break;
4917 }
4918 GETCHARLENTEST(c, eptr, len);
4919 prop_category = UCD_CATEGORY(c);
4920 if ((prop_category == ucp_L || prop_category == ucp_N ||
4921 c == CHAR_UNDERSCORE) == prop_fail_result)
4922 break;
4923 eptr+= len;
4924 }
4925 break;
4926
4927 default:
4928 RRETURN(PCRE_ERROR_INTERNAL);
4929 }
4930
4931 /* eptr is now past the end of the maximum run */
4932
4933 if (possessive) continue;
4934 for(;;)
4935 {
4936 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4937 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4938 if (eptr-- == pp) break; /* Stop if tried at original pos */
4939 if (utf8) BACKCHAR(eptr);
4940 }
4941 }
4942
4943 /* Match extended Unicode sequences. We will get here only if the
4944 support is in the binary; otherwise a compile-time error occurs. */
4945
4946 else if (ctype == OP_EXTUNI)
4947 {
4948 for (i = min; i < max; i++)
4949 {
4950 if (eptr >= md->end_subject)
4951 {
4952 SCHECK_PARTIAL();
4953 break;
4954 }
4955 GETCHARINCTEST(c, eptr);
4956 prop_category = UCD_CATEGORY(c);
4957 if (prop_category == ucp_M) break;
4958 while (eptr < md->end_subject)
4959 {
4960 int len = 1;
4961 if (!utf8) c = *eptr; else
4962 {
4963 GETCHARLEN(c, eptr, len);
4964 }
4965 prop_category = UCD_CATEGORY(c);
4966 if (prop_category != ucp_M) break;
4967 eptr += len;
4968 }
4969 }
4970
4971 /* eptr is now past the end of the maximum run */
4972
4973 if (possessive) continue;
4974
4975 for(;;)
4976 {
4977 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
4978 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4979 if (eptr-- == pp) break; /* Stop if tried at original pos */
4980 for (;;) /* Move back over one extended */
4981 {
4982 int len = 1;
4983 if (!utf8) c = *eptr; else
4984 {
4985 BACKCHAR(eptr);
4986 GETCHARLEN(c, eptr, len);
4987 }
4988 prop_category = UCD_CATEGORY(c);
4989 if (prop_category != ucp_M) break;
4990 eptr--;
4991 }
4992 }
4993 }
4994
4995 else
4996 #endif /* SUPPORT_UCP */
4997
4998 #ifdef SUPPORT_UTF8
4999 /* UTF-8 mode */
5000
5001 if (utf8)
5002 {
5003 switch(ctype)
5004 {
5005 case OP_ANY:
5006 if (max < INT_MAX)
5007 {
5008 for (i = min; i < max; i++)
5009 {
5010 if (eptr >= md->end_subject)
5011 {
5012 SCHECK_PARTIAL();
5013 break;
5014 }
5015 if (IS_NEWLINE(eptr)) break;
5016 eptr++;
5017 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5018 }
5019 }
5020
5021 /* Handle unlimited UTF-8 repeat */
5022
5023 else
5024 {
5025 for (i = min; i < max; i++)
5026 {
5027 if (eptr >= md->end_subject)
5028 {
5029 SCHECK_PARTIAL();
5030 break;
5031 }
5032 if (IS_NEWLINE(eptr)) break;
5033 eptr++;
5034 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5035 }
5036 }
5037 break;
5038
5039 case OP_ALLANY:
5040 if (max < INT_MAX)
5041 {
5042 for (i = min; i < max; i++)
5043 {
5044 if (eptr >= md->end_subject)
5045 {
5046 SCHECK_PARTIAL();
5047 break;
5048 }
5049 eptr++;
5050 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
5051 }
5052 }
5053 else eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5054 break;
5055
5056 /* The byte case is the same as non-UTF8 */
5057
5058 case OP_ANYBYTE:
5059 c = max - min;
5060 if (c > (unsigned int)(md->end_subject - eptr))
5061 {
5062 eptr = md->end_subject;
5063 SCHECK_PARTIAL();
5064 }
5065 else eptr += c;
5066 break;
5067
5068 case OP_ANYNL:
5069 for (i = min; i < max; i++)
5070 {
5071 int len = 1;
5072 if (eptr >= md->end_subject)
5073 {
5074 SCHECK_PARTIAL();
5075 break;
5076 }
5077 GETCHARLEN(c, eptr, len);
5078 if (c == 0x000d)
5079 {
5080 if (++eptr >= md->end_subject) break;
5081 if (*eptr == 0x000a) eptr++;
5082 }
5083 else
5084 {
5085 if (c != 0x000a &&
5086 (md->bsr_anycrlf ||
5087 (c != 0x000b && c != 0x000c &&
5088 c != 0x0085 && c != 0x2028 && c != 0x2029)))
5089 break;
5090 eptr += len;
5091 }
5092 }
5093 break;
5094
5095 case OP_NOT_HSPACE:
5096 case OP_HSPACE:
5097 for (i = min; i < max; i++)
5098 {
5099 BOOL gotspace;
5100 int len = 1;
5101 if (eptr >= md->end_subject)
5102 {
5103 SCHECK_PARTIAL();
5104 break;
5105 }
5106 GETCHARLEN(c, eptr, len);
5107 switch(c)
5108 {
5109 default: gotspace = FALSE; break;
5110 case 0x09: /* HT */
5111 case 0x20: /* SPACE */
5112 case 0xa0: /* NBSP */
5113 case 0x1680: /* OGHAM SPACE MARK */
5114 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
5115 case 0x2000: /* EN QUAD */
5116 case 0x2001: /* EM QUAD */
5117 case 0x2002: /* EN SPACE */
5118 case 0x2003: /* EM SPACE */
5119 case 0x2004: /* THREE-PER-EM SPACE */
5120 case 0x2005: /* FOUR-PER-EM SPACE */
5121 case 0x2006: /* SIX-PER-EM SPACE */
5122 case 0x2007: /* FIGURE SPACE */
5123 case 0x2008: /* PUNCTUATION SPACE */
5124 case 0x2009: /* THIN SPACE */
5125 case 0x200A: /* HAIR SPACE */
5126 case 0x202f: /* NARROW NO-BREAK SPACE */
5127 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
5128 case 0x3000: /* IDEOGRAPHIC SPACE */
5129 gotspace = TRUE;
5130 break;
5131 }
5132 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5133 eptr += len;
5134 }
5135 break;
5136
5137 case OP_NOT_VSPACE:
5138 case OP_VSPACE:
5139 for (i = min; i < max; i++)
5140 {
5141 BOOL gotspace;
5142 int len = 1;
5143 if (eptr >= md->end_subject)
5144 {
5145 SCHECK_PARTIAL();
5146 break;
5147 }
5148 GETCHARLEN(c, eptr, len);
5149 switch(c)
5150 {
5151 default: gotspace = FALSE; break;
5152 case 0x0a: /* LF */
5153 case 0x0b: /* VT */
5154 case 0x0c: /* FF */
5155 case 0x0d: /* CR */
5156 case 0x85: /* NEL */
5157 case 0x2028: /* LINE SEPARATOR */
5158 case 0x2029: /* PARAGRAPH SEPARATOR */
5159 gotspace = TRUE;
5160 break;
5161 }
5162 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5163 eptr += len;
5164 }
5165 break;
5166
5167 case OP_NOT_DIGIT:
5168 for (i = min; i < max; i++)
5169 {
5170 int len = 1;
5171 if (eptr >= md->end_subject)
5172 {
5173 SCHECK_PARTIAL();
5174 break;
5175 }
5176 GETCHARLEN(c, eptr, len);
5177 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5178 eptr+= len;
5179 }
5180 break;
5181
5182 case OP_DIGIT:
5183 for (i = min; i < max; i++)
5184 {
5185 int len = 1;
5186 if (eptr >= md->end_subject)
5187 {
5188 SCHECK_PARTIAL();
5189 break;
5190 }
5191 GETCHARLEN(c, eptr, len);
5192 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5193 eptr+= len;
5194 }
5195 break;
5196
5197 case OP_NOT_WHITESPACE:
5198 for (i = min; i < max; i++)
5199 {
5200 int len = 1;
5201 if (eptr >= md->end_subject)
5202 {
5203 SCHECK_PARTIAL();
5204 break;
5205 }
5206 GETCHARLEN(c, eptr, len);
5207 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5208 eptr+= len;
5209 }
5210 break;
5211
5212 case OP_WHITESPACE:
5213 for (i = min; i < max; i++)
5214 {
5215 int len = 1;
5216 if (eptr >= md->end_subject)
5217 {
5218 SCHECK_PARTIAL();
5219 break;
5220 }
5221 GETCHARLEN(c, eptr, len);
5222 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5223 eptr+= len;
5224 }
5225 break;
5226
5227 case OP_NOT_WORDCHAR:
5228 for (i = min; i < max; i++)
5229 {
5230 int len = 1;
5231 if (eptr >= md->end_subject)
5232 {
5233 SCHECK_PARTIAL();
5234 break;
5235 }
5236 GETCHARLEN(c, eptr, len);
5237 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5238 eptr+= len;
5239 }
5240 break;
5241
5242 case OP_WORDCHAR:
5243 for (i = min; i < max; i++)
5244 {
5245 int len = 1;
5246 if (eptr >= md->end_subject)
5247 {
5248 SCHECK_PARTIAL();
5249 break;
5250 }
5251 GETCHARLEN(c, eptr, len);
5252 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5253 eptr+= len;
5254 }
5255 break;
5256
5257 default:
5258 RRETURN(PCRE_ERROR_INTERNAL);
5259 }
5260
5261 /* eptr is now past the end of the maximum run */
5262
5263 if (possessive) continue;
5264 for(;;)
5265 {
5266 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
5267 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5268 if (eptr-- == pp) break; /* Stop if tried at original pos */
5269 BACKCHAR(eptr);
5270 }
5271 }
5272 else
5273 #endif /* SUPPORT_UTF8 */
5274
5275 /* Not UTF-8 mode */
5276 {
5277 switch(ctype)
5278 {
5279 case OP_ANY:
5280 for (i = min; i < max; i++)
5281 {
5282 if (eptr >= md->end_subject)
5283 {
5284 SCHECK_PARTIAL();
5285 break;
5286 }
5287 if (IS_NEWLINE(eptr)) break;
5288 eptr++;
5289 }
5290 break;
5291
5292 case OP_ALLANY:
5293 case OP_ANYBYTE:
5294 c = max - min;
5295 if (c > (unsigned int)(md->end_subject - eptr))
5296 {
5297 eptr = md->end_subject;
5298 SCHECK_PARTIAL();
5299 }
5300 else eptr += c;
5301 break;
5302
5303 case OP_ANYNL:
5304 for (i = min; i < max; i++)
5305 {
5306 if (eptr >= md->end_subject)
5307 {
5308 SCHECK_PARTIAL();
5309 break;
5310 }
5311 c = *eptr;
5312 if (c == 0x000d)
5313 {
5314 if (++eptr >= md->end_subject) break;
5315 if (*eptr == 0x000a) eptr++;
5316 }
5317 else
5318 {
5319 if (c != 0x000a &&
5320 (md->bsr_anycrlf ||
5321 (c != 0x000b && c != 0x000c && c != 0x0085)))
5322 break;
5323 eptr++;
5324 }
5325 }
5326 break;
5327
5328 case OP_NOT_HSPACE:
5329 for (i = min; i < max; i++)
5330 {
5331 if (eptr >= md->end_subject)
5332 {
5333 SCHECK_PARTIAL();
5334 break;
5335 }
5336 c = *eptr;
5337 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
5338 eptr++;
5339 }
5340 break;
5341
5342 case OP_HSPACE:
5343 for (i = min; i < max; i++)
5344 {
5345 if (eptr >= md->end_subject)
5346 {
5347 SCHECK_PARTIAL();
5348 break;
5349 }
5350 c = *eptr;
5351 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
5352 eptr++;
5353 }
5354 break;
5355
5356 case OP_NOT_VSPACE:
5357 for (i = min; i < max; i++)
5358 {
5359 if (eptr >= md->end_subject)
5360 {
5361 SCHECK_PARTIAL();
5362 break;
5363 }
5364 c = *eptr;
5365 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
5366 break;
5367 eptr++;
5368 }
5369 break;
5370
5371 case OP_VSPACE:
5372 for (i = min; i < max; i++)
5373 {
5374 if (eptr >= md->end_subject)
5375 {
5376 SCHECK_PARTIAL();
5377 break;
5378 }
5379 c = *eptr;
5380 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
5381 break;
5382 eptr++;
5383 }
5384 break;
5385
5386 case OP_NOT_DIGIT:
5387 for (i = min; i < max; i++)
5388 {
5389 if (eptr >= md->end_subject)
5390 {
5391 SCHECK_PARTIAL();
5392 break;
5393 }
5394 if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
5395 eptr++;
5396 }
5397 break;
5398
5399 case OP_DIGIT:
5400 for (i = min; i < max; i++)
5401 {
5402 if (eptr >= md->end_subject)
5403 {
5404 SCHECK_PARTIAL();
5405 break;
5406 }
5407 if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
5408 eptr++;
5409 }
5410 break;
5411
5412 case OP_NOT_WHITESPACE:
5413 for (i = min; i < max; i++)
5414 {
5415 if (eptr >= md->end_subject)
5416 {
5417 SCHECK_PARTIAL();
5418 break;
5419 }
5420 if ((md->ctypes[*eptr] & ctype_space) != 0) break;
5421 eptr++;
5422 }
5423 break;
5424
5425 case OP_WHITESPACE:
5426 for (i = min; i < max; i++)
5427 {
5428 if (eptr >= md->end_subject)
5429 {
5430 SCHECK_PARTIAL();
5431 break;
5432 }
5433 if ((md->ctypes[*eptr] & ctype_space) == 0) break;
5434 eptr++;
5435 }
5436 break;
5437
5438 case OP_NOT_WORDCHAR:
5439 for (i = min; i < max; i++)
5440 {
5441 if (eptr >= md->end_subject)
5442 {
5443 SCHECK_PARTIAL();
5444 break;
5445 }
5446 if ((md->ctypes[*eptr] & ctype_word) != 0) break;
5447 eptr++;
5448 }
5449 break;
5450
5451 case OP_WORDCHAR:
5452 for (i = min; i < max; i++)
5453 {
5454 if (eptr >= md->end_subject)
5455 {
5456 SCHECK_PARTIAL();
5457 break;
5458 }
5459 if ((md->ctypes[*eptr] & ctype_word) == 0) break;
5460 eptr++;
5461 }
5462 break;
5463
5464 default:
5465 RRETURN(PCRE_ERROR_INTERNAL);
5466 }
5467
5468 /* eptr is now past the end of the maximum run */
5469
5470 if (possessive) continue;
5471 while (eptr >= pp)
5472 {
5473 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
5474 eptr--;
5475 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5476 }
5477 }
5478
5479 /* Get here if we can't make it match with any permitted repetitions */
5480
5481 MRRETURN(MATCH_NOMATCH);
5482 }
5483 /* Control never gets here */
5484
5485 /* There's been some horrible disaster. Arrival here can only mean there is
5486 something seriously wrong in the code above or the OP_xxx definitions. */
5487
5488 default:
5489 DPRINTF(("Unknown opcode %d\n", *ecode));
5490 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
5491 }
5492
5493 /* Do not stick any code in here without much thought; it is assumed
5494 that "continue" in the code above comes out to here to repeat the main
5495 loop. */
5496
5497 } /* End of main loop */
5498 /* Control never reaches here */
5499
5500
5501 /* When compiling to use the heap rather than the stack for recursive calls to
5502 match(), the RRETURN() macro jumps here. The number that is saved in
5503 frame->Xwhere indicates which label we actually want to return to. */
5504
5505 #ifdef NO_RECURSE
5506 #define LBL(val) case val: goto L_RM##val;
5507 HEAP_RETURN:
5508 switch (frame->Xwhere)
5509 {
5510 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
5511 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5512 LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5513 LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5514 LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5515 #ifdef SUPPORT_UTF8
5516 LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5517 LBL(32) LBL(34) LBL(42) LBL(46)
5518 #ifdef SUPPORT_UCP
5519 LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5520 LBL(59) LBL(60) LBL(61) LBL(62)
5521 #endif /* SUPPORT_UCP */
5522 #endif /* SUPPORT_UTF8 */
5523 default:
5524 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
5525 return PCRE_ERROR_INTERNAL;
5526 }
5527 #undef LBL
5528 #endif /* NO_RECURSE */
5529 }
5530
5531
5532 /***************************************************************************
5533 ****************************************************************************
5534 RECURSION IN THE match() FUNCTION
5535
5536 Undefine all the macros that were defined above to handle this. */
5537
5538 #ifdef NO_RECURSE
5539 #undef eptr
5540 #undef ecode
5541 #undef mstart
5542 #undef offset_top
5543 #undef ims
5544 #undef eptrb
5545 #undef flags
5546
5547 #undef callpat
5548 #undef charptr
5549 #undef data
5550 #undef next
5551 #undef pp
5552 #undef prev
5553 #undef saved_eptr
5554
5555 #undef new_recursive
5556
5557 #undef cur_is_word
5558 #undef condition
5559 #undef prev_is_word
5560
5561 #undef original_ims
5562
5563 #undef ctype
5564 #undef length
5565 #undef max
5566 #undef min
5567 #undef number
5568 #undef offset
5569 #undef op
5570 #undef save_capture_last
5571 #undef save_offset1
5572 #undef save_offset2
5573 #undef save_offset3
5574 #undef stacksave
5575
5576 #undef newptrb
5577
5578 #endif
5579
5580 /* These two are defined as macros in both cases */
5581
5582 #undef fc
5583 #undef fi
5584
5585 /***************************************************************************
5586 ***************************************************************************/
5587
5588
5589
5590 /*************************************************
5591 * Execute a Regular Expression *
5592 *************************************************/
5593
5594 /* This function applies a compiled re to a subject string and picks out
5595 portions of the string if it matches. Two elements in the vector are set for
5596 each substring: the offsets to the start and end of the substring.
5597
5598 Arguments:
5599 argument_re points to the compiled expression
5600 extra_data points to extra data or is NULL
5601 subject points to the subject string
5602 length length of subject string (may contain binary zeros)
5603 start_offset where to start in the subject string
5604 options option bits
5605 offsets points to a vector of ints to be filled in with offsets
5606 offsetcount the number of elements in the vector
5607
5608 Returns: > 0 => success; value is the number of elements filled in
5609 = 0 => success, but offsets is not big enough
5610 -1 => failed to match
5611 < -1 => some kind of unexpected problem
5612 */
5613
5614 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
5615 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
5616 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
5617 int offsetcount)
5618 {
5619 int rc, resetcount, ocount;
5620 int first_byte = -1;
5621 int req_byte = -1;
5622 int req_byte2 = -1;
5623 int newline;
5624 unsigned long int ims;
5625 BOOL using_temporary_offsets = FALSE;
5626 BOOL anchored;
5627 BOOL startline;
5628 BOOL firstline;
5629 BOOL first_byte_caseless = FALSE;
5630 BOOL req_byte_caseless = FALSE;
5631 BOOL utf8;
5632 match_data match_block;
5633 match_data *md = &match_block;
5634 const uschar *tables;
5635 const uschar *start_bits = NULL;
5636 USPTR start_match = (USPTR)subject + start_offset;
5637 USPTR end_subject;
5638 USPTR start_partial = NULL;
5639 USPTR req_byte_ptr = start_match - 1;
5640
5641 pcre_study_data internal_study;
5642 const pcre_study_data *study;
5643
5644 real_pcre internal_re;
5645 const real_pcre *external_re = (const real_pcre *)argument_re;
5646 const real_pcre *re = external_re;
5647
5648 /* Plausibility checks */
5649
5650 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
5651 if (re == NULL || subject == NULL ||
5652 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
5653 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
5654 if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
5655
5656 /* This information is for finding all the numbers associated with a given
5657 name, for condition testing. */
5658
5659 md->name_table = (uschar *)re + re->name_table_offset;
5660 md->name_count = re->name_count;
5661 md->name_entry_size = re->name_entry_size;
5662
5663 /* Fish out the optional data from the extra_data structure, first setting
5664 the default values. */
5665
5666 study = NULL;
5667 md->match_limit = MATCH_LIMIT;
5668 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
5669 md->callout_data = NULL;
5670
5671 /* The table pointer is always in native byte order. */
5672
5673 tables = external_re->tables;
5674
5675 if (extra_data != NULL)
5676 {
5677 register unsigned int flags = extra_data->flags;
5678 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
5679 study = (const pcre_study_data *)extra_data->study_data;
5680 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
5681 md->match_limit = extra_data->match_limit;
5682 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
5683 md->match_limit_recursion = extra_data->match_limit_recursion;
5684 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
5685 md->callout_data = extra_data->callout_data;
5686 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
5687 }
5688
5689 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
5690 is a feature that makes it possible to save compiled regex and re-use them
5691 in other programs later. */
5692
5693 if (tables == NULL) tables = _pcre_default_tables;
5694
5695 /* Check that the first field in the block is the magic number. If it is not,
5696 test for a regex that was compiled on a host of opposite endianness. If this is
5697 the case, flipped values are put in internal_re and internal_study if there was
5698 study data too. */
5699
5700 if (re->magic_number != MAGIC_NUMBER)
5701 {
5702 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
5703 if (re == NULL) return PCRE_ERROR_BADMAGIC;
5704 if (study != NULL) study = &internal_study;
5705 }
5706
5707 /* Set up other data */
5708
5709 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
5710 startline = (re->flags & PCRE_STARTLINE) != 0;
5711 firstline = (re->options & PCRE_FIRSTLINE) != 0;
5712
5713 /* The code starts after the real_pcre block and the capture name table. */
5714
5715 md->start_code = (const uschar *)external_re + re->name_table_offset +
5716 re->name_count * re->name_entry_size;
5717
5718 md->start_subject = (USPTR)subject;
5719 md->start_offset = start_offset;
5720 md->end_subject = md->start_subject + length;
5721 end_subject = md->end_subject;
5722
5723 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5724 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5725 md->use_ucp = (re->options & PCRE_UCP) != 0;
5726 md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5727
5728 md->notbol = (options & PCRE_NOTBOL) != 0;
5729 md->noteol = (options & PCRE_NOTEOL) != 0;
5730 md->notempty = (options & PCRE_NOTEMPTY) != 0;
5731 md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5732 md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5733 ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5734 md->hitend = FALSE;
5735 md->mark = NULL; /* In case never set */
5736
5737 md->recursive = NULL; /* No recursion at top level */
5738
5739 md->lcc = tables + lcc_offset;
5740 md->ctypes = tables + ctypes_offset;
5741
5742 /* Handle different \R options. */
5743
5744 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5745 {
5746 case 0:
5747 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
5748 md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
5749 else
5750 #ifdef BSR_ANYCRLF
5751 md->bsr_anycrlf = TRUE;
5752 #else
5753 md->bsr_anycrlf = FALSE;
5754 #endif
5755 break;
5756
5757 case PCRE_BSR_ANYCRLF:
5758 md->bsr_anycrlf = TRUE;
5759 break;
5760
5761 case PCRE_BSR_UNICODE:
5762 md->bsr_anycrlf = FALSE;
5763 break;
5764
5765 default: return PCRE_ERROR_BADNEWLINE;
5766 }
5767
5768 /* Handle different types of newline. The three bits give eight cases. If
5769 nothing is set at run time, whatever was used at compile time applies. */
5770
5771 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
5772 (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5773 {
5774 case 0: newline = NEWLINE; break; /* Compile-time default */
5775 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5776 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5777 case PCRE_NEWLINE_CR+
5778 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5779 case PCRE_NEWLINE_ANY: newline = -1; break;
5780 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5781 default: return PCRE_ERROR_BADNEWLINE;
5782 }
5783
5784 if (newline == -2)
5785 {
5786 md->nltype = NLTYPE_ANYCRLF;
5787 }
5788 else if (newline < 0)
5789 {
5790 md->nltype = NLTYPE_ANY;
5791 }
5792 else
5793 {
5794 md->nltype = NLTYPE_FIXED;
5795 if (newline > 255)
5796 {
5797 md->nllen = 2;
5798 md->nl[0] = (newline >> 8) & 255;
5799 md->nl[1] = newline & 255;
5800 }
5801 else
5802 {
5803 md->nllen = 1;
5804 md->nl[0] = newline;
5805 }
5806 }
5807
5808 /* Partial matching was originally supported only for a restricted set of
5809 regexes; from release 8.00 there are no restrictions, but the bits are still
5810 defined (though never set). So there's no harm in leaving this code. */
5811
5812 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5813 return PCRE_ERROR_BADPARTIAL;
5814
5815 /* Check a UTF-8 string if required. Pass back the character offset and error
5816 code if a results vector is available. */
5817
5818 #ifdef SUPPORT_UTF8
5819 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5820 {
5821 int errorcode;
5822 int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
5823 if (tb >= 0)
5824 {
5825 if (offsetcount >= 2)
5826 {
5827 offsets[0] = tb;
5828 offsets[1] = errorcode;
5829 }
5830 return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
5831 PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
5832 }
5833 if (start_offset > 0 && start_offset < length)
5834 {
5835 tb = ((USPTR)subject)[start_offset] & 0xc0;
5836 if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET;
5837 }
5838 }
5839 #endif
5840
5841 /* The ims options can vary during the matching as a result of the presence
5842 of (?ims) items in the pattern. They are kept in a local variable so that
5843 restoring at the exit of a group is easy. */
5844
5845 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
5846
5847 /* If the expression has got more back references than the offsets supplied can
5848 hold, we get a temporary chunk of working store to use during the matching.
5849 Otherwise, we can use the vector supplied, rounding down its size to a multiple
5850 of 3. */
5851
5852 ocount = offsetcount - (offsetcount % 3);
5853
5854 if (re->top_backref > 0 && re->top_backref >= ocount/3)
5855 {
5856 ocount = re->top_backref * 3 + 3;
5857 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
5858 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
5859 using_temporary_offsets = TRUE;
5860 DPRINTF(("Got memory to hold back references\n"));
5861 }
5862 else md->offset_vector = offsets;
5863
5864 md->offset_end = ocount;
5865 md->offset_max = (2*ocount)/3;
5866 md->offset_overflow = FALSE;
5867 md->capture_last = -1;
5868
5869 /* Compute the minimum number of offsets that we need to reset each time. Doing
5870 this makes a huge difference to execution time when there aren't many brackets
5871 in the pattern. */
5872
5873 resetcount = 2 + re->top_bracket * 2;
5874 if (resetcount > offsetcount) resetcount = ocount;
5875
5876 /* Reset the working variable associated with each extraction. These should
5877 never be used unless previously set, but they get saved and restored, and so we
5878 initialize them to avoid reading uninitialized locations. */
5879
5880 if (md->offset_vector != NULL)
5881 {
5882 register int *iptr = md->offset_vector + ocount;
5883 register int *iend = iptr - resetcount/2 + 1;
5884 while (--iptr >= iend) *iptr = -1;
5885 }
5886
5887 /* Set up the first character to match, if available. The first_byte value is
5888 never set for an anchored regular expression, but the anchoring may be forced
5889 at run time, so we have to test for anchoring. The first char may be unset for
5890 an unanchored pattern, of course. If there's no first char and the pattern was
5891 studied, there may be a bitmap of possible first characters. */
5892
5893 if (!anchored)
5894 {
5895 if ((re->flags & PCRE_FIRSTSET) != 0)
5896 {
5897 first_byte = re->first_byte & 255;
5898 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
5899 first_byte = md->lcc[first_byte];
5900 }
5901 else
5902 if (!startline && study != NULL &&
5903 (study->flags & PCRE_STUDY_MAPPED) != 0)
5904 start_bits = study->start_bits;
5905 }
5906
5907 /* For anchored or unanchored matches, there may be a "last known required
5908 character" set. */
5909
5910 if ((re->flags & PCRE_REQCHSET) != 0)
5911 {
5912 req_byte = re->req_byte & 255;
5913 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
5914 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
5915 }
5916
5917
5918 /* ==========================================================================*/
5919
5920 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
5921 the loop runs just once. */
5922
5923 for(;;)
5924 {
5925 USPTR save_end_subject = end_subject;
5926 USPTR new_start_match;
5927
5928 /* Reset the maximum number of extractions we might see. */
5929
5930 if (md->offset_vector != NULL)
5931 {
5932 register int *iptr = md->offset_vector;
5933 register int *iend = iptr + resetcount;
5934 while (iptr < iend) *iptr++ = -1;
5935 }
5936
5937 /* If firstline is TRUE, the start of the match is constrained to the first
5938 line of a multiline string. That is, the match must be before or at the first
5939 newline. Implement this by temporarily adjusting end_subject so that we stop
5940 scanning at a newline. If the match fails at the newline, later code breaks
5941 this loop. */
5942
5943 if (firstline)
5944 {
5945 USPTR t = start_match;
5946 #ifdef SUPPORT_UTF8
5947 if (utf8)
5948 {
5949 while (t < md->end_subject && !IS_NEWLINE(t))
5950 {
5951 t++;
5952 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5953 }
5954 }
5955 else
5956 #endif
5957 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5958 end_subject = t;
5959 }
5960
5961 /* There are some optimizations that avoid running the match if a known
5962 starting point is not found, or if a known later character is not present.
5963 However, there is an option that disables these, for testing and for ensuring
5964 that all callouts do actually occur. The option can be set in the regex by
5965 (*NO_START_OPT) or passed in match-time options. */
5966
5967 if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
5968 {
5969 /* Advance to a unique first byte if there is one. */
5970
5971 if (first_byte >= 0)
5972 {
5973 if (first_byte_caseless)
5974 while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5975 start_match++;
5976 else
5977 while (start_match < end_subject && *start_match != first_byte)
5978 start_match++;
5979 }
5980
5981 /* Or to just after a linebreak for a multiline match */
5982
5983 else if (startline)
5984 {
5985 if (start_match > md->start_subject + start_offset)
5986 {
5987 #ifdef SUPPORT_UTF8
5988 if (utf8)
5989 {
5990 while (start_match < end_subject && !WAS_NEWLINE(start_match))
5991 {
5992 start_match++;
5993 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5994 start_match++;
5995 }
5996 }
5997 else
5998 #endif
5999 while (start_match < end_subject && !WAS_NEWLINE(start_match))
6000 start_match++;
6001
6002 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6003 and we are now at a LF, advance the match position by one more character.
6004 */
6005
6006 if (start_match[-1] == CHAR_CR &&
6007 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
6008 start_match < end_subject &&
6009 *start_match == CHAR_NL)
6010 start_match++;
6011 }
6012 }
6013
6014 /* Or to a non-unique first byte after study */
6015
6016 else if (start_bits != NULL)
6017 {
6018 while (start_match < end_subject)
6019 {
6020 register unsigned int c = *start_match;
6021 if ((start_bits[c/8] & (1 << (c&7))) == 0)
6022 {
6023 start_match++;
6024 #ifdef SUPPORT_UTF8
6025 if (utf8)
6026 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
6027 start_match++;
6028 #endif
6029 }
6030 else break;
6031 }
6032 }
6033 } /* Starting optimizations */
6034
6035 /* Restore fudged end_subject */
6036
6037 end_subject = save_end_subject;
6038
6039 /* The following two optimizations are disabled for partial matching or if
6040 disabling is explicitly requested. */
6041
6042 if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
6043 {
6044 /* If the pattern was studied, a minimum subject length may be set. This is
6045 a lower bound; no actual string of that length may actually match the
6046 pattern. Although the value is, strictly, in characters, we treat it as
6047 bytes to avoid spending too much time in this optimization. */
6048
6049 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
6050 (pcre_uint32)(end_subject - start_match) < study->minlength)
6051 {
6052 rc = MATCH_NOMATCH;
6053 break;
6054 }
6055
6056 /* If req_byte is set, we know that that character must appear in the
6057 subject for the match to succeed. If the first character is set, req_byte
6058 must be later in the subject; otherwise the test starts at the match point.
6059 This optimization can save a huge amount of backtracking in patterns with
6060 nested unlimited repeats that aren't going to match. Writing separate code
6061 for cased/caseless versions makes it go faster, as does using an
6062 autoincrement and backing off on a match.
6063
6064 HOWEVER: when the subject string is very, very long, searching to its end
6065 can take a long time, and give bad performance on quite ordinary patterns.
6066 This showed up when somebody was matching something like /^\d+C/ on a
6067 32-megabyte string... so we don't do this when the string is sufficiently
6068 long. */
6069
6070 if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
6071 {
6072 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
6073
6074 /* We don't need to repeat the search if we haven't yet reached the
6075 place we found it at last time. */
6076
6077 if (p > req_byte_ptr)
6078 {
6079 if (req_byte_caseless)
6080 {
6081 while (p < end_subject)
6082 {
6083 register int pp = *p++;
6084 if (pp == req_byte || pp == req_byte2) { p--; break; }
6085 }
6086 }
6087 else
6088 {
6089 while (p < end_subject)
6090 {
6091 if (*p++ == req_byte) { p--; break; }
6092 }
6093 }
6094
6095 /* If we can't find the required character, break the matching loop,
6096 forcing a match failure. */
6097
6098 if (p >= end_subject)
6099 {
6100 rc = MATCH_NOMATCH;
6101 break;
6102 }
6103
6104 /* If we have found the required character, save the point where we
6105 found it, so that we don't search again next time round the loop if
6106 the start hasn't passed this character yet. */
6107
6108 req_byte_ptr = p;
6109 }
6110 }
6111 }
6112
6113 #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6114 printf(">>>> Match against: ");
6115 pchars(start_match, end_subject - start_match, TRUE, md);
6116 printf("\n");
6117 #endif
6118
6119 /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6120 first starting point for which a partial match was found. */
6121
6122 md->start_match_ptr = start_match;
6123 md->start_used_ptr = start_match;
6124 md->match_call_count = 0;
6125 rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
6126 0, 0);
6127 if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6128
6129 switch(rc)
6130 {
6131 /* SKIP passes back the next starting point explicitly, but if it is the
6132 same as the match we have just done, treat it as NOMATCH. */
6133
6134 case MATCH_SKIP:
6135 if (md->start_match_ptr != start_match)
6136 {
6137 new_start_match = md->start_match_ptr;
6138 break;
6139 }
6140 /* Fall through */
6141
6142 /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6143 the SKIP's arg was not found. We also treat this as NOMATCH. */
6144
6145 case MATCH_SKIP_ARG:
6146 /* Fall through */
6147
6148 /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6149 exactly like PRUNE. */
6150
6151 case MATCH_NOMATCH:
6152 case MATCH_PRUNE:
6153 case MATCH_THEN:
6154 new_start_match = start_match + 1;
6155 #ifdef SUPPORT_UTF8
6156 if (utf8)
6157 while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
6158 new_start_match++;
6159 #endif
6160 break;
6161
6162 /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6163
6164 case MATCH_COMMIT:
6165 rc = MATCH_NOMATCH;
6166 goto ENDLOOP;
6167
6168 /* Any other return is either a match, or some kind of error. */
6169
6170 default:
6171 goto ENDLOOP;
6172 }
6173
6174 /* Control reaches here for the various types of "no match at this point"
6175 result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6176
6177 rc = MATCH_NOMATCH;
6178
6179 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
6180 newline in the subject (though it may continue over the newline). Therefore,
6181 if we have just failed to match, starting at a newline, do not continue. */
6182
6183 if (firstline && IS_NEWLINE(start_match)) break;
6184
6185 /* Advance to new matching position */
6186
6187 start_match = new_start_match;
6188
6189 /* Break the loop if the pattern is anchored or if we have passed the end of
6190 the subject. */
6191
6192 if (anchored || start_match > end_subject) break;
6193
6194 /* If we have just passed a CR and we are now at a LF, and the pattern does
6195 not contain any explicit matches for \r or \n, and the newline option is CRLF
6196 or ANY or ANYCRLF, advance the match position by one more character. */
6197
6198 if (start_match[-1] == CHAR_CR &&
6199 start_match < end_subject &&
6200 *start_match == CHAR_NL &&
6201 (re->flags & PCRE_HASCRORLF) == 0 &&
6202 (md->nltype == NLTYPE_ANY ||
6203 md->nltype == NLTYPE_ANYCRLF ||
6204 md->nllen == 2))
6205 start_match++;
6206
6207 md->mark = NULL; /* Reset for start of next match attempt */
6208 } /* End of for(;;) "bumpalong" loop */
6209
6210 /* ==========================================================================*/
6211
6212 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
6213 conditions is true:
6214
6215 (1) The pattern is anchored or the match was failed by (*COMMIT);
6216
6217 (2) We are past the end of the subject;
6218
6219 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
6220 this option requests that a match occur at or before the first newline in
6221 the subject.
6222
6223 When we have a match and the offset vector is big enough to deal with any
6224 backreferences, captured substring offsets will already be set up. In the case
6225 where we had to get some local store to hold offsets for backreference
6226 processing, copy those that we can. In this case there need not be overflow if
6227 certain parts of the pattern were not used, even though there are more
6228 capturing parentheses than vector slots. */
6229
6230 ENDLOOP:
6231
6232 if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
6233 {
6234 if (using_temporary_offsets)
6235 {
6236 if (offsetcount >= 4)
6237 {
6238 memcpy(offsets + 2, md->offset_vector + 2,
6239 (offsetcount - 2) * sizeof(int));
6240 DPRINTF(("Copied offsets from temporary memory\n"));
6241 }
6242 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
6243 DPRINTF(("Freeing temporary memory\n"));
6244 (pcre_free)(md->offset_vector);
6245 }
6246
6247 /* Set the return code to the number of captured strings, or 0 if there are
6248 too many to fit into the vector. */
6249
6250 rc = md->offset_overflow? 0 : md->end_offset_top/2;
6251
6252 /* If there is space, set up the whole thing as substring 0. The value of
6253 md->start_match_ptr might be modified if \K was encountered on the success
6254 matching path. */
6255
6256 if (offsetcount < 2) rc = 0; else
6257 {
6258 offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6259 offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6260 }
6261
6262 DPRINTF((">>>> returning %d\n", rc));
6263 goto RETURN_MARK;
6264 }
6265
6266 /* Control gets here if there has been an error, or if the overall match
6267 attempt has failed at all permitted starting positions. */
6268
6269 if (using_temporary_offsets)
6270 {
6271 DPRINTF(("Freeing temporary memory\n"));
6272 (pcre_free)(md->offset_vector);
6273 }
6274
6275 /* For anything other than nomatch or partial match, just return the code. */
6276
6277 if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6278 {
6279 DPRINTF((">>>> error: returning %d\n", rc));
6280 return rc;
6281 }
6282
6283 /* Handle partial matches - disable any mark data */
6284
6285 if (start_partial != NULL)
6286 {
6287 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6288 md->mark = NULL;
6289 if (offsetcount > 1)
6290 {
6291 offsets[0] = (int)(start_partial - (USPTR)subject);
6292 offsets[1] = (int)(end_subject - (USPTR)subject);
6293 }
6294 rc = PCRE_ERROR_PARTIAL;
6295 }
6296
6297 /* This is the classic nomatch case */
6298
6299 else
6300 {
6301 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6302 rc = PCRE_ERROR_NOMATCH;
6303 }
6304
6305 /* Return the MARK data if it has been requested. */
6306
6307 RETURN_MARK:
6308
6309 if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6310 *(extra_data->mark) = (unsigned char *)(md->mark);
6311 return rc;
6312 }
6313
6314 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12