/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 190 - (show annotations) (download)
Thu Jul 19 10:38:20 2007 UTC (7 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 146326 byte(s)
Fix bug with .*$ when run in not-DOTALL UTF-8 mode; small performance 
improvement for .* in DOTALL UTF-8 mode.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains pcre_exec(), the externally visible function that does
42 pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43 possible. There are also some static supporting functions. */
44
45 #define NLBLOCK md /* Block containing newline information */
46 #define PSSTART start_subject /* Field containing processed string start */
47 #define PSEND end_subject /* Field containing processed string end */
48
49 #include "pcre_internal.h"
50
51 /* Undefine some potentially clashing cpp symbols */
52
53 #undef min
54 #undef max
55
56 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
57 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
58
59 #define EPTR_WORK_SIZE (1000)
60
61 /* Flag bits for the match() function */
62
63 #define match_condassert 0x01 /* Called to check a condition assertion */
64 #define match_cbegroup 0x02 /* Could-be-empty unlimited repeat group */
65 #define match_tail_recursed 0x04 /* Tail recursive call */
66
67 /* Non-error returns from the match() function. Error returns are externally
68 defined PCRE_ERROR_xxx codes, which are all negative. */
69
70 #define MATCH_MATCH 1
71 #define MATCH_NOMATCH 0
72
73 /* Maximum number of ints of offset to save on the stack for recursive calls.
74 If the offset vector is bigger, malloc is used. This should be a multiple of 3,
75 because the offset vector is always a multiple of 3 long. */
76
77 #define REC_STACK_SAVE_MAX 30
78
79 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
80
81 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
82 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
83
84
85
86 #ifdef DEBUG
87 /*************************************************
88 * Debugging function to print chars *
89 *************************************************/
90
91 /* Print a sequence of chars in printable format, stopping at the end of the
92 subject if the requested.
93
94 Arguments:
95 p points to characters
96 length number to print
97 is_subject TRUE if printing from within md->start_subject
98 md pointer to matching data block, if is_subject is TRUE
99
100 Returns: nothing
101 */
102
103 static void
104 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
105 {
106 unsigned int c;
107 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
108 while (length-- > 0)
109 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
110 }
111 #endif
112
113
114
115 /*************************************************
116 * Match a back-reference *
117 *************************************************/
118
119 /* If a back reference hasn't been set, the length that is passed is greater
120 than the number of characters left in the string, so the match fails.
121
122 Arguments:
123 offset index into the offset vector
124 eptr points into the subject
125 length length to be matched
126 md points to match data block
127 ims the ims flags
128
129 Returns: TRUE if matched
130 */
131
132 static BOOL
133 match_ref(int offset, register USPTR eptr, int length, match_data *md,
134 unsigned long int ims)
135 {
136 USPTR p = md->start_subject + md->offset_vector[offset];
137
138 #ifdef DEBUG
139 if (eptr >= md->end_subject)
140 printf("matching subject <null>");
141 else
142 {
143 printf("matching subject ");
144 pchars(eptr, length, TRUE, md);
145 }
146 printf(" against backref ");
147 pchars(p, length, FALSE, md);
148 printf("\n");
149 #endif
150
151 /* Always fail if not enough characters left */
152
153 if (length > md->end_subject - eptr) return FALSE;
154
155 /* Separate the caselesss case for speed */
156
157 if ((ims & PCRE_CASELESS) != 0)
158 {
159 while (length-- > 0)
160 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
161 }
162 else
163 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
164
165 return TRUE;
166 }
167
168
169
170 /***************************************************************************
171 ****************************************************************************
172 RECURSION IN THE match() FUNCTION
173
174 The match() function is highly recursive, though not every recursive call
175 increases the recursive depth. Nevertheless, some regular expressions can cause
176 it to recurse to a great depth. I was writing for Unix, so I just let it call
177 itself recursively. This uses the stack for saving everything that has to be
178 saved for a recursive call. On Unix, the stack can be large, and this works
179 fine.
180
181 It turns out that on some non-Unix-like systems there are problems with
182 programs that use a lot of stack. (This despite the fact that every last chip
183 has oodles of memory these days, and techniques for extending the stack have
184 been known for decades.) So....
185
186 There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
187 calls by keeping local variables that need to be preserved in blocks of memory
188 obtained from malloc() instead instead of on the stack. Macros are used to
189 achieve this so that the actual code doesn't look very different to what it
190 always used to.
191
192 The original heap-recursive code used longjmp(). However, it seems that this
193 can be very slow on some operating systems. Following a suggestion from Stan
194 Switzer, the use of longjmp() has been abolished, at the cost of having to
195 provide a unique number for each call to RMATCH. There is no way of generating
196 a sequence of numbers at compile time in C. I have given them names, to make
197 them stand out more clearly.
198
199 Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
200 FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
201 tests. Furthermore, not using longjmp() means that local dynamic variables
202 don't have indeterminate values; this has meant that the frame size can be
203 reduced because the result can be "passed back" by straight setting of the
204 variable instead of being passed in the frame.
205 ****************************************************************************
206 ***************************************************************************/
207
208
209 /* Numbers for RMATCH calls */
210
211 enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
212 RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
213 RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
214 RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
215 RM41, RM42, RM43, RM44, RM45, RM46, RM47 };
216
217
218 /* These versions of the macros use the stack, as normal. There are debugging
219 versions and production versions. Note that the "rw" argument of RMATCH isn't
220 actuall used in this definition. */
221
222 #ifndef NO_RECURSE
223 #define REGISTER register
224
225 #ifdef DEBUG
226 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
227 { \
228 printf("match() called in line %d\n", __LINE__); \
229 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
230 printf("to line %d\n", __LINE__); \
231 }
232 #define RRETURN(ra) \
233 { \
234 printf("match() returned %d from line %d ", ra, __LINE__); \
235 return ra; \
236 }
237 #else
238 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
239 rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
240 #define RRETURN(ra) return ra
241 #endif
242
243 #else
244
245
246 /* These versions of the macros manage a private stack on the heap. Note that
247 the "rd" argument of RMATCH isn't actually used in this definition. It's the md
248 argument of match(), which never changes. */
249
250 #define REGISTER
251
252 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
253 {\
254 heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
255 frame->Xwhere = rw; \
256 newframe->Xeptr = ra;\
257 newframe->Xecode = rb;\
258 newframe->Xmstart = mstart;\
259 newframe->Xoffset_top = rc;\
260 newframe->Xims = re;\
261 newframe->Xeptrb = rf;\
262 newframe->Xflags = rg;\
263 newframe->Xrdepth = frame->Xrdepth + 1;\
264 newframe->Xprevframe = frame;\
265 frame = newframe;\
266 DPRINTF(("restarting from line %d\n", __LINE__));\
267 goto HEAP_RECURSE;\
268 L_##rw:\
269 DPRINTF(("jumped back to line %d\n", __LINE__));\
270 }
271
272 #define RRETURN(ra)\
273 {\
274 heapframe *newframe = frame;\
275 frame = newframe->Xprevframe;\
276 (pcre_stack_free)(newframe);\
277 if (frame != NULL)\
278 {\
279 rrc = ra;\
280 goto HEAP_RETURN;\
281 }\
282 return ra;\
283 }
284
285
286 /* Structure for remembering the local variables in a private frame */
287
288 typedef struct heapframe {
289 struct heapframe *Xprevframe;
290
291 /* Function arguments that may change */
292
293 const uschar *Xeptr;
294 const uschar *Xecode;
295 const uschar *Xmstart;
296 int Xoffset_top;
297 long int Xims;
298 eptrblock *Xeptrb;
299 int Xflags;
300 unsigned int Xrdepth;
301
302 /* Function local variables */
303
304 const uschar *Xcallpat;
305 const uschar *Xcharptr;
306 const uschar *Xdata;
307 const uschar *Xnext;
308 const uschar *Xpp;
309 const uschar *Xprev;
310 const uschar *Xsaved_eptr;
311
312 recursion_info Xnew_recursive;
313
314 BOOL Xcur_is_word;
315 BOOL Xcondition;
316 BOOL Xprev_is_word;
317
318 unsigned long int Xoriginal_ims;
319
320 #ifdef SUPPORT_UCP
321 int Xprop_type;
322 int Xprop_value;
323 int Xprop_fail_result;
324 int Xprop_category;
325 int Xprop_chartype;
326 int Xprop_script;
327 int Xoclength;
328 uschar Xocchars[8];
329 #endif
330
331 int Xctype;
332 unsigned int Xfc;
333 int Xfi;
334 int Xlength;
335 int Xmax;
336 int Xmin;
337 int Xnumber;
338 int Xoffset;
339 int Xop;
340 int Xsave_capture_last;
341 int Xsave_offset1, Xsave_offset2, Xsave_offset3;
342 int Xstacksave[REC_STACK_SAVE_MAX];
343
344 eptrblock Xnewptrb;
345
346 /* Where to jump back to */
347
348 int Xwhere;
349
350 } heapframe;
351
352 #endif
353
354
355 /***************************************************************************
356 ***************************************************************************/
357
358
359
360 /*************************************************
361 * Match from current position *
362 *************************************************/
363
364 /* This function is called recursively in many circumstances. Whenever it
365 returns a negative (error) response, the outer incarnation must also return the
366 same response.
367
368 Performance note: It might be tempting to extract commonly used fields from the
369 md structure (e.g. utf8, end_subject) into individual variables to improve
370 performance. Tests using gcc on a SPARC disproved this; in the first case, it
371 made performance worse.
372
373 Arguments:
374 eptr pointer to current character in subject
375 ecode pointer to current position in compiled code
376 mstart pointer to the current match start position (can be modified
377 by encountering \K)
378 offset_top current top pointer
379 md pointer to "static" info for the match
380 ims current /i, /m, and /s options
381 eptrb pointer to chain of blocks containing eptr at start of
382 brackets - for testing for empty matches
383 flags can contain
384 match_condassert - this is an assertion condition
385 match_cbegroup - this is the start of an unlimited repeat
386 group that can match an empty string
387 match_tail_recursed - this is a tail_recursed group
388 rdepth the recursion depth
389
390 Returns: MATCH_MATCH if matched ) these values are >= 0
391 MATCH_NOMATCH if failed to match )
392 a negative PCRE_ERROR_xxx value if aborted by an error condition
393 (e.g. stopped by repeated call or recursion limit)
394 */
395
396 static int
397 match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
398 int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
399 int flags, unsigned int rdepth)
400 {
401 /* These variables do not need to be preserved over recursion in this function,
402 so they can be ordinary variables in all cases. Mark some of them with
403 "register" because they are used a lot in loops. */
404
405 register int rrc; /* Returns from recursive calls */
406 register int i; /* Used for loops not involving calls to RMATCH() */
407 register unsigned int c; /* Character values not kept over RMATCH() calls */
408 register BOOL utf8; /* Local copy of UTF-8 flag for speed */
409
410 BOOL minimize, possessive; /* Quantifier options */
411
412 /* When recursion is not being used, all "local" variables that have to be
413 preserved over calls to RMATCH() are part of a "frame" which is obtained from
414 heap storage. Set up the top-level frame here; others are obtained from the
415 heap whenever RMATCH() does a "recursion". See the macro definitions above. */
416
417 #ifdef NO_RECURSE
418 heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
419 frame->Xprevframe = NULL; /* Marks the top level */
420
421 /* Copy in the original argument variables */
422
423 frame->Xeptr = eptr;
424 frame->Xecode = ecode;
425 frame->Xmstart = mstart;
426 frame->Xoffset_top = offset_top;
427 frame->Xims = ims;
428 frame->Xeptrb = eptrb;
429 frame->Xflags = flags;
430 frame->Xrdepth = rdepth;
431
432 /* This is where control jumps back to to effect "recursion" */
433
434 HEAP_RECURSE:
435
436 /* Macros make the argument variables come from the current frame */
437
438 #define eptr frame->Xeptr
439 #define ecode frame->Xecode
440 #define mstart frame->Xmstart
441 #define offset_top frame->Xoffset_top
442 #define ims frame->Xims
443 #define eptrb frame->Xeptrb
444 #define flags frame->Xflags
445 #define rdepth frame->Xrdepth
446
447 /* Ditto for the local variables */
448
449 #ifdef SUPPORT_UTF8
450 #define charptr frame->Xcharptr
451 #endif
452 #define callpat frame->Xcallpat
453 #define data frame->Xdata
454 #define next frame->Xnext
455 #define pp frame->Xpp
456 #define prev frame->Xprev
457 #define saved_eptr frame->Xsaved_eptr
458
459 #define new_recursive frame->Xnew_recursive
460
461 #define cur_is_word frame->Xcur_is_word
462 #define condition frame->Xcondition
463 #define prev_is_word frame->Xprev_is_word
464
465 #define original_ims frame->Xoriginal_ims
466
467 #ifdef SUPPORT_UCP
468 #define prop_type frame->Xprop_type
469 #define prop_value frame->Xprop_value
470 #define prop_fail_result frame->Xprop_fail_result
471 #define prop_category frame->Xprop_category
472 #define prop_chartype frame->Xprop_chartype
473 #define prop_script frame->Xprop_script
474 #define oclength frame->Xoclength
475 #define occhars frame->Xocchars
476 #endif
477
478 #define ctype frame->Xctype
479 #define fc frame->Xfc
480 #define fi frame->Xfi
481 #define length frame->Xlength
482 #define max frame->Xmax
483 #define min frame->Xmin
484 #define number frame->Xnumber
485 #define offset frame->Xoffset
486 #define op frame->Xop
487 #define save_capture_last frame->Xsave_capture_last
488 #define save_offset1 frame->Xsave_offset1
489 #define save_offset2 frame->Xsave_offset2
490 #define save_offset3 frame->Xsave_offset3
491 #define stacksave frame->Xstacksave
492
493 #define newptrb frame->Xnewptrb
494
495 /* When recursion is being used, local variables are allocated on the stack and
496 get preserved during recursion in the normal way. In this environment, fi and
497 i, and fc and c, can be the same variables. */
498
499 #else /* NO_RECURSE not defined */
500 #define fi i
501 #define fc c
502
503
504 #ifdef SUPPORT_UTF8 /* Many of these variables are used only */
505 const uschar *charptr; /* in small blocks of the code. My normal */
506 #endif /* style of coding would have declared */
507 const uschar *callpat; /* them within each of those blocks. */
508 const uschar *data; /* However, in order to accommodate the */
509 const uschar *next; /* version of this code that uses an */
510 USPTR pp; /* external "stack" implemented on the */
511 const uschar *prev; /* heap, it is easier to declare them all */
512 USPTR saved_eptr; /* here, so the declarations can be cut */
513 /* out in a block. The only declarations */
514 recursion_info new_recursive; /* within blocks below are for variables */
515 /* that do not have to be preserved over */
516 BOOL cur_is_word; /* a recursive call to RMATCH(). */
517 BOOL condition;
518 BOOL prev_is_word;
519
520 unsigned long int original_ims;
521
522 #ifdef SUPPORT_UCP
523 int prop_type;
524 int prop_value;
525 int prop_fail_result;
526 int prop_category;
527 int prop_chartype;
528 int prop_script;
529 int oclength;
530 uschar occhars[8];
531 #endif
532
533 int ctype;
534 int length;
535 int max;
536 int min;
537 int number;
538 int offset;
539 int op;
540 int save_capture_last;
541 int save_offset1, save_offset2, save_offset3;
542 int stacksave[REC_STACK_SAVE_MAX];
543
544 eptrblock newptrb;
545 #endif /* NO_RECURSE */
546
547 /* These statements are here to stop the compiler complaining about unitialized
548 variables. */
549
550 #ifdef SUPPORT_UCP
551 prop_value = 0;
552 prop_fail_result = 0;
553 #endif
554
555
556 /* This label is used for tail recursion, which is used in a few cases even
557 when NO_RECURSE is not defined, in order to reduce the amount of stack that is
558 used. Thanks to Ian Taylor for noticing this possibility and sending the
559 original patch. */
560
561 TAIL_RECURSE:
562
563 /* OK, now we can get on with the real code of the function. Recursive calls
564 are specified by the macro RMATCH and RRETURN is used to return. When
565 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
566 and a "return", respectively (possibly with some debugging if DEBUG is
567 defined). However, RMATCH isn't like a function call because it's quite a
568 complicated macro. It has to be used in one particular way. This shouldn't,
569 however, impact performance when true recursion is being used. */
570
571 #ifdef SUPPORT_UTF8
572 utf8 = md->utf8; /* Local copy of the flag */
573 #else
574 utf8 = FALSE;
575 #endif
576
577 /* First check that we haven't called match() too many times, or that we
578 haven't exceeded the recursive call limit. */
579
580 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
581 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
582
583 original_ims = ims; /* Save for resetting on ')' */
584
585 /* At the start of a group with an unlimited repeat that may match an empty
586 string, the match_cbegroup flag is set. When this is the case, add the current
587 subject pointer to the chain of such remembered pointers, to be checked when we
588 hit the closing ket, in order to break infinite loops that match no characters.
589 When match() is called in other circumstances, don't add to the chain. If this
590 is a tail recursion, use a block from the workspace, as the one on the stack is
591 already used. */
592
593 if ((flags & match_cbegroup) != 0)
594 {
595 eptrblock *p;
596 if ((flags & match_tail_recursed) != 0)
597 {
598 if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
599 p = md->eptrchain + md->eptrn++;
600 }
601 else p = &newptrb;
602 p->epb_saved_eptr = eptr;
603 p->epb_prev = eptrb;
604 eptrb = p;
605 }
606
607 /* Now start processing the opcodes. */
608
609 for (;;)
610 {
611 minimize = possessive = FALSE;
612 op = *ecode;
613
614 /* For partial matching, remember if we ever hit the end of the subject after
615 matching at least one subject character. */
616
617 if (md->partial &&
618 eptr >= md->end_subject &&
619 eptr > mstart)
620 md->hitend = TRUE;
621
622 switch(op)
623 {
624 /* Handle a capturing bracket. If there is space in the offset vector, save
625 the current subject position in the working slot at the top of the vector.
626 We mustn't change the current values of the data slot, because they may be
627 set from a previous iteration of this group, and be referred to by a
628 reference inside the group.
629
630 If the bracket fails to match, we need to restore this value and also the
631 values of the final offsets, in case they were set by a previous iteration
632 of the same bracket.
633
634 If there isn't enough space in the offset vector, treat this as if it were
635 a non-capturing bracket. Don't worry about setting the flag for the error
636 case here; that is handled in the code for KET. */
637
638 case OP_CBRA:
639 case OP_SCBRA:
640 number = GET2(ecode, 1+LINK_SIZE);
641 offset = number << 1;
642
643 #ifdef DEBUG
644 printf("start bracket %d\n", number);
645 printf("subject=");
646 pchars(eptr, 16, TRUE, md);
647 printf("\n");
648 #endif
649
650 if (offset < md->offset_max)
651 {
652 save_offset1 = md->offset_vector[offset];
653 save_offset2 = md->offset_vector[offset+1];
654 save_offset3 = md->offset_vector[md->offset_end - number];
655 save_capture_last = md->capture_last;
656
657 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
658 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
659
660 flags = (op == OP_SCBRA)? match_cbegroup : 0;
661 do
662 {
663 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
664 ims, eptrb, flags, RM1);
665 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
666 md->capture_last = save_capture_last;
667 ecode += GET(ecode, 1);
668 }
669 while (*ecode == OP_ALT);
670
671 DPRINTF(("bracket %d failed\n", number));
672
673 md->offset_vector[offset] = save_offset1;
674 md->offset_vector[offset+1] = save_offset2;
675 md->offset_vector[md->offset_end - number] = save_offset3;
676
677 RRETURN(MATCH_NOMATCH);
678 }
679
680 /* Insufficient room for saving captured contents. Treat as a non-capturing
681 bracket. */
682
683 DPRINTF(("insufficient capture room: treat as non-capturing\n"));
684
685 /* Non-capturing bracket. Loop for all the alternatives. When we get to the
686 final alternative within the brackets, we would return the result of a
687 recursive call to match() whatever happened. We can reduce stack usage by
688 turning this into a tail recursion. */
689
690 case OP_BRA:
691 case OP_SBRA:
692 DPRINTF(("start non-capturing bracket\n"));
693 flags = (op >= OP_SBRA)? match_cbegroup : 0;
694 for (;;)
695 {
696 if (ecode[GET(ecode, 1)] != OP_ALT)
697 {
698 ecode += _pcre_OP_lengths[*ecode];
699 flags |= match_tail_recursed;
700 DPRINTF(("bracket 0 tail recursion\n"));
701 goto TAIL_RECURSE;
702 }
703
704 /* For non-final alternatives, continue the loop for a NOMATCH result;
705 otherwise return. */
706
707 RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
708 eptrb, flags, RM2);
709 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
710 ecode += GET(ecode, 1);
711 }
712 /* Control never reaches here. */
713
714 /* Conditional group: compilation checked that there are no more than
715 two branches. If the condition is false, skipping the first branch takes us
716 past the end if there is only one branch, but that's OK because that is
717 exactly what going to the ket would do. As there is only one branch to be
718 obeyed, we can use tail recursion to avoid using another stack frame. */
719
720 case OP_COND:
721 case OP_SCOND:
722 if (ecode[LINK_SIZE+1] == OP_RREF) /* Recursion test */
723 {
724 offset = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/
725 condition = md->recursive != NULL &&
726 (offset == RREF_ANY || offset == md->recursive->group_num);
727 ecode += condition? 3 : GET(ecode, 1);
728 }
729
730 else if (ecode[LINK_SIZE+1] == OP_CREF) /* Group used test */
731 {
732 offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */
733 condition = offset < offset_top && md->offset_vector[offset] >= 0;
734 ecode += condition? 3 : GET(ecode, 1);
735 }
736
737 else if (ecode[LINK_SIZE+1] == OP_DEF) /* DEFINE - always false */
738 {
739 condition = FALSE;
740 ecode += GET(ecode, 1);
741 }
742
743 /* The condition is an assertion. Call match() to evaluate it - setting
744 the final argument match_condassert causes it to stop at the end of an
745 assertion. */
746
747 else
748 {
749 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
750 match_condassert, RM3);
751 if (rrc == MATCH_MATCH)
752 {
753 condition = TRUE;
754 ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
755 while (*ecode == OP_ALT) ecode += GET(ecode, 1);
756 }
757 else if (rrc != MATCH_NOMATCH)
758 {
759 RRETURN(rrc); /* Need braces because of following else */
760 }
761 else
762 {
763 condition = FALSE;
764 ecode += GET(ecode, 1);
765 }
766 }
767
768 /* We are now at the branch that is to be obeyed. As there is only one,
769 we can use tail recursion to avoid using another stack frame. If the second
770 alternative doesn't exist, we can just plough on. */
771
772 if (condition || *ecode == OP_ALT)
773 {
774 ecode += 1 + LINK_SIZE;
775 flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
776 goto TAIL_RECURSE;
777 }
778 else
779 {
780 ecode += 1 + LINK_SIZE;
781 }
782 break;
783
784
785 /* End of the pattern. If we are in a top-level recursion, we should
786 restore the offsets appropriately and continue from after the call. */
787
788 case OP_END:
789 if (md->recursive != NULL && md->recursive->group_num == 0)
790 {
791 recursion_info *rec = md->recursive;
792 DPRINTF(("End of pattern in a (?0) recursion\n"));
793 md->recursive = rec->prevrec;
794 memmove(md->offset_vector, rec->offset_save,
795 rec->saved_max * sizeof(int));
796 mstart = rec->save_start;
797 ims = original_ims;
798 ecode = rec->after_call;
799 break;
800 }
801
802 /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
803 string - backtracking will then try other alternatives, if any. */
804
805 if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
806 md->end_match_ptr = eptr; /* Record where we ended */
807 md->end_offset_top = offset_top; /* and how many extracts were taken */
808 md->start_match_ptr = mstart; /* and the start (\K can modify) */
809 RRETURN(MATCH_MATCH);
810
811 /* Change option settings */
812
813 case OP_OPT:
814 ims = ecode[1];
815 ecode += 2;
816 DPRINTF(("ims set to %02lx\n", ims));
817 break;
818
819 /* Assertion brackets. Check the alternative branches in turn - the
820 matching won't pass the KET for an assertion. If any one branch matches,
821 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
822 start of each branch to move the current point backwards, so the code at
823 this level is identical to the lookahead case. */
824
825 case OP_ASSERT:
826 case OP_ASSERTBACK:
827 do
828 {
829 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
830 RM4);
831 if (rrc == MATCH_MATCH) break;
832 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
833 ecode += GET(ecode, 1);
834 }
835 while (*ecode == OP_ALT);
836 if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
837
838 /* If checking an assertion for a condition, return MATCH_MATCH. */
839
840 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
841
842 /* Continue from after the assertion, updating the offsets high water
843 mark, since extracts may have been taken during the assertion. */
844
845 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
846 ecode += 1 + LINK_SIZE;
847 offset_top = md->end_offset_top;
848 continue;
849
850 /* Negative assertion: all branches must fail to match */
851
852 case OP_ASSERT_NOT:
853 case OP_ASSERTBACK_NOT:
854 do
855 {
856 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
857 RM5);
858 if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
859 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
860 ecode += GET(ecode,1);
861 }
862 while (*ecode == OP_ALT);
863
864 if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);
865
866 ecode += 1 + LINK_SIZE;
867 continue;
868
869 /* Move the subject pointer back. This occurs only at the start of
870 each branch of a lookbehind assertion. If we are too close to the start to
871 move back, this match function fails. When working with UTF-8 we move
872 back a number of characters, not bytes. */
873
874 case OP_REVERSE:
875 #ifdef SUPPORT_UTF8
876 if (utf8)
877 {
878 i = GET(ecode, 1);
879 while (i-- > 0)
880 {
881 eptr--;
882 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
883 BACKCHAR(eptr)
884 }
885 }
886 else
887 #endif
888
889 /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
890
891 {
892 eptr -= GET(ecode, 1);
893 if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
894 }
895
896 /* Skip to next op code */
897
898 ecode += 1 + LINK_SIZE;
899 break;
900
901 /* The callout item calls an external function, if one is provided, passing
902 details of the match so far. This is mainly for debugging, though the
903 function is able to force a failure. */
904
905 case OP_CALLOUT:
906 if (pcre_callout != NULL)
907 {
908 pcre_callout_block cb;
909 cb.version = 1; /* Version 1 of the callout block */
910 cb.callout_number = ecode[1];
911 cb.offset_vector = md->offset_vector;
912 cb.subject = (PCRE_SPTR)md->start_subject;
913 cb.subject_length = md->end_subject - md->start_subject;
914 cb.start_match = mstart - md->start_subject;
915 cb.current_position = eptr - md->start_subject;
916 cb.pattern_position = GET(ecode, 2);
917 cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
918 cb.capture_top = offset_top/2;
919 cb.capture_last = md->capture_last;
920 cb.callout_data = md->callout_data;
921 if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
922 if (rrc < 0) RRETURN(rrc);
923 }
924 ecode += 2 + 2*LINK_SIZE;
925 break;
926
927 /* Recursion either matches the current regex, or some subexpression. The
928 offset data is the offset to the starting bracket from the start of the
929 whole pattern. (This is so that it works from duplicated subpatterns.)
930
931 If there are any capturing brackets started but not finished, we have to
932 save their starting points and reinstate them after the recursion. However,
933 we don't know how many such there are (offset_top records the completed
934 total) so we just have to save all the potential data. There may be up to
935 65535 such values, which is too large to put on the stack, but using malloc
936 for small numbers seems expensive. As a compromise, the stack is used when
937 there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc
938 is used. A problem is what to do if the malloc fails ... there is no way of
939 returning to the top level with an error. Save the top REC_STACK_SAVE_MAX
940 values on the stack, and accept that the rest may be wrong.
941
942 There are also other values that have to be saved. We use a chained
943 sequence of blocks that actually live on the stack. Thanks to Robin Houston
944 for the original version of this logic. */
945
946 case OP_RECURSE:
947 {
948 callpat = md->start_code + GET(ecode, 1);
949 new_recursive.group_num = (callpat == md->start_code)? 0 :
950 GET2(callpat, 1 + LINK_SIZE);
951
952 /* Add to "recursing stack" */
953
954 new_recursive.prevrec = md->recursive;
955 md->recursive = &new_recursive;
956
957 /* Find where to continue from afterwards */
958
959 ecode += 1 + LINK_SIZE;
960 new_recursive.after_call = ecode;
961
962 /* Now save the offset data. */
963
964 new_recursive.saved_max = md->offset_end;
965 if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
966 new_recursive.offset_save = stacksave;
967 else
968 {
969 new_recursive.offset_save =
970 (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));
971 if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
972 }
973
974 memcpy(new_recursive.offset_save, md->offset_vector,
975 new_recursive.saved_max * sizeof(int));
976 new_recursive.save_start = mstart;
977 mstart = eptr;
978
979 /* OK, now we can do the recursion. For each top-level alternative we
980 restore the offset and recursion data. */
981
982 DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
983 flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
984 do
985 {
986 RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
987 md, ims, eptrb, flags, RM6);
988 if (rrc == MATCH_MATCH)
989 {
990 DPRINTF(("Recursion matched\n"));
991 md->recursive = new_recursive.prevrec;
992 if (new_recursive.offset_save != stacksave)
993 (pcre_free)(new_recursive.offset_save);
994 RRETURN(MATCH_MATCH);
995 }
996 else if (rrc != MATCH_NOMATCH)
997 {
998 DPRINTF(("Recursion gave error %d\n", rrc));
999 RRETURN(rrc);
1000 }
1001
1002 md->recursive = &new_recursive;
1003 memcpy(md->offset_vector, new_recursive.offset_save,
1004 new_recursive.saved_max * sizeof(int));
1005 callpat += GET(callpat, 1);
1006 }
1007 while (*callpat == OP_ALT);
1008
1009 DPRINTF(("Recursion didn't match\n"));
1010 md->recursive = new_recursive.prevrec;
1011 if (new_recursive.offset_save != stacksave)
1012 (pcre_free)(new_recursive.offset_save);
1013 RRETURN(MATCH_NOMATCH);
1014 }
1015 /* Control never reaches here */
1016
1017 /* "Once" brackets are like assertion brackets except that after a match,
1018 the point in the subject string is not moved back. Thus there can never be
1019 a move back into the brackets. Friedl calls these "atomic" subpatterns.
1020 Check the alternative branches in turn - the matching won't pass the KET
1021 for this kind of subpattern. If any one branch matches, we carry on as at
1022 the end of a normal bracket, leaving the subject pointer. */
1023
1024 case OP_ONCE:
1025 prev = ecode;
1026 saved_eptr = eptr;
1027
1028 do
1029 {
1030 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1031 eptrb, 0, RM7);
1032 if (rrc == MATCH_MATCH) break;
1033 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1034 ecode += GET(ecode,1);
1035 }
1036 while (*ecode == OP_ALT);
1037
1038 /* If hit the end of the group (which could be repeated), fail */
1039
1040 if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1041
1042 /* Continue as from after the assertion, updating the offsets high water
1043 mark, since extracts may have been taken. */
1044
1045 do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1046
1047 offset_top = md->end_offset_top;
1048 eptr = md->end_match_ptr;
1049
1050 /* For a non-repeating ket, just continue at this level. This also
1051 happens for a repeating ket if no characters were matched in the group.
1052 This is the forcible breaking of infinite loops as implemented in Perl
1053 5.005. If there is an options reset, it will get obeyed in the normal
1054 course of events. */
1055
1056 if (*ecode == OP_KET || eptr == saved_eptr)
1057 {
1058 ecode += 1+LINK_SIZE;
1059 break;
1060 }
1061
1062 /* The repeating kets try the rest of the pattern or restart from the
1063 preceding bracket, in the appropriate order. The second "call" of match()
1064 uses tail recursion, to avoid using another stack frame. We need to reset
1065 any options that changed within the bracket before re-running it, so
1066 check the next opcode. */
1067
1068 if (ecode[1+LINK_SIZE] == OP_OPT)
1069 {
1070 ims = (ims & ~PCRE_IMS) | ecode[4];
1071 DPRINTF(("ims set to %02lx at group repeat\n", ims));
1072 }
1073
1074 if (*ecode == OP_KETRMIN)
1075 {
1076 RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,
1077 RM8);
1078 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079 ecode = prev;
1080 flags = match_tail_recursed;
1081 goto TAIL_RECURSE;
1082 }
1083 else /* OP_KETRMAX */
1084 {
1085 RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1086 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1087 ecode += 1 + LINK_SIZE;
1088 flags = match_tail_recursed;
1089 goto TAIL_RECURSE;
1090 }
1091 /* Control never gets here */
1092
1093 /* An alternation is the end of a branch; scan along to find the end of the
1094 bracketed group and go to there. */
1095
1096 case OP_ALT:
1097 do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1098 break;
1099
1100 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
1101 that it may occur zero times. It may repeat infinitely, or not at all -
1102 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
1103 repeat limits are compiled as a number of copies, with the optional ones
1104 preceded by BRAZERO or BRAMINZERO. */
1105
1106 case OP_BRAZERO:
1107 {
1108 next = ecode+1;
1109 RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1110 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1111 do next += GET(next,1); while (*next == OP_ALT);
1112 ecode = next + 1 + LINK_SIZE;
1113 }
1114 break;
1115
1116 case OP_BRAMINZERO:
1117 {
1118 next = ecode+1;
1119 do next += GET(next, 1); while (*next == OP_ALT);
1120 RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1121 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1122 ecode++;
1123 }
1124 break;
1125
1126 /* End of a group, repeated or non-repeating. */
1127
1128 case OP_KET:
1129 case OP_KETRMIN:
1130 case OP_KETRMAX:
1131 prev = ecode - GET(ecode, 1);
1132
1133 /* If this was a group that remembered the subject start, in order to break
1134 infinite repeats of empty string matches, retrieve the subject start from
1135 the chain. Otherwise, set it NULL. */
1136
1137 if (*prev >= OP_SBRA)
1138 {
1139 saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1140 eptrb = eptrb->epb_prev; /* Backup to previous group */
1141 }
1142 else saved_eptr = NULL;
1143
1144 /* If we are at the end of an assertion group, stop matching and return
1145 MATCH_MATCH, but record the current high water mark for use by positive
1146 assertions. Do this also for the "once" (atomic) groups. */
1147
1148 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1149 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1150 *prev == OP_ONCE)
1151 {
1152 md->end_match_ptr = eptr; /* For ONCE */
1153 md->end_offset_top = offset_top;
1154 RRETURN(MATCH_MATCH);
1155 }
1156
1157 /* For capturing groups we have to check the group number back at the start
1158 and if necessary complete handling an extraction by setting the offsets and
1159 bumping the high water mark. Note that whole-pattern recursion is coded as
1160 a recurse into group 0, so it won't be picked up here. Instead, we catch it
1161 when the OP_END is reached. Other recursion is handled here. */
1162
1163 if (*prev == OP_CBRA || *prev == OP_SCBRA)
1164 {
1165 number = GET2(prev, 1+LINK_SIZE);
1166 offset = number << 1;
1167
1168 #ifdef DEBUG
1169 printf("end bracket %d", number);
1170 printf("\n");
1171 #endif
1172
1173 md->capture_last = number;
1174 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1175 {
1176 md->offset_vector[offset] =
1177 md->offset_vector[md->offset_end - number];
1178 md->offset_vector[offset+1] = eptr - md->start_subject;
1179 if (offset_top <= offset) offset_top = offset + 2;
1180 }
1181
1182 /* Handle a recursively called group. Restore the offsets
1183 appropriately and continue from after the call. */
1184
1185 if (md->recursive != NULL && md->recursive->group_num == number)
1186 {
1187 recursion_info *rec = md->recursive;
1188 DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1189 md->recursive = rec->prevrec;
1190 mstart = rec->save_start;
1191 memcpy(md->offset_vector, rec->offset_save,
1192 rec->saved_max * sizeof(int));
1193 ecode = rec->after_call;
1194 ims = original_ims;
1195 break;
1196 }
1197 }
1198
1199 /* For both capturing and non-capturing groups, reset the value of the ims
1200 flags, in case they got changed during the group. */
1201
1202 ims = original_ims;
1203 DPRINTF(("ims reset to %02lx\n", ims));
1204
1205 /* For a non-repeating ket, just continue at this level. This also
1206 happens for a repeating ket if no characters were matched in the group.
1207 This is the forcible breaking of infinite loops as implemented in Perl
1208 5.005. If there is an options reset, it will get obeyed in the normal
1209 course of events. */
1210
1211 if (*ecode == OP_KET || eptr == saved_eptr)
1212 {
1213 ecode += 1 + LINK_SIZE;
1214 break;
1215 }
1216
1217 /* The repeating kets try the rest of the pattern or restart from the
1218 preceding bracket, in the appropriate order. In the second case, we can use
1219 tail recursion to avoid using another stack frame. */
1220
1221 flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1222
1223 if (*ecode == OP_KETRMIN)
1224 {
1225 RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,
1226 RM12);
1227 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1228 ecode = prev;
1229 flags |= match_tail_recursed;
1230 goto TAIL_RECURSE;
1231 }
1232 else /* OP_KETRMAX */
1233 {
1234 RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1235 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1236 ecode += 1 + LINK_SIZE;
1237 flags = match_tail_recursed;
1238 goto TAIL_RECURSE;
1239 }
1240 /* Control never gets here */
1241
1242 /* Start of subject unless notbol, or after internal newline if multiline */
1243
1244 case OP_CIRC:
1245 if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1246 if ((ims & PCRE_MULTILINE) != 0)
1247 {
1248 if (eptr != md->start_subject &&
1249 (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1250 RRETURN(MATCH_NOMATCH);
1251 ecode++;
1252 break;
1253 }
1254 /* ... else fall through */
1255
1256 /* Start of subject assertion */
1257
1258 case OP_SOD:
1259 if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
1260 ecode++;
1261 break;
1262
1263 /* Start of match assertion */
1264
1265 case OP_SOM:
1266 if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
1267 ecode++;
1268 break;
1269
1270 /* Reset the start of match point */
1271
1272 case OP_SET_SOM:
1273 mstart = eptr;
1274 ecode++;
1275 break;
1276
1277 /* Assert before internal newline if multiline, or before a terminating
1278 newline unless endonly is set, else end of subject unless noteol is set. */
1279
1280 case OP_DOLL:
1281 if ((ims & PCRE_MULTILINE) != 0)
1282 {
1283 if (eptr < md->end_subject)
1284 { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1285 else
1286 { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1287 ecode++;
1288 break;
1289 }
1290 else
1291 {
1292 if (md->noteol) RRETURN(MATCH_NOMATCH);
1293 if (!md->endonly)
1294 {
1295 if (eptr != md->end_subject &&
1296 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1297 RRETURN(MATCH_NOMATCH);
1298 ecode++;
1299 break;
1300 }
1301 }
1302 /* ... else fall through for endonly */
1303
1304 /* End of subject assertion (\z) */
1305
1306 case OP_EOD:
1307 if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
1308 ecode++;
1309 break;
1310
1311 /* End of subject or ending \n assertion (\Z) */
1312
1313 case OP_EODN:
1314 if (eptr != md->end_subject &&
1315 (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1316 RRETURN(MATCH_NOMATCH);
1317 ecode++;
1318 break;
1319
1320 /* Word boundary assertions */
1321
1322 case OP_NOT_WORD_BOUNDARY:
1323 case OP_WORD_BOUNDARY:
1324 {
1325
1326 /* Find out if the previous and current characters are "word" characters.
1327 It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1328 be "non-word" characters. */
1329
1330 #ifdef SUPPORT_UTF8
1331 if (utf8)
1332 {
1333 if (eptr == md->start_subject) prev_is_word = FALSE; else
1334 {
1335 const uschar *lastptr = eptr - 1;
1336 while((*lastptr & 0xc0) == 0x80) lastptr--;
1337 GETCHAR(c, lastptr);
1338 prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1339 }
1340 if (eptr >= md->end_subject) cur_is_word = FALSE; else
1341 {
1342 GETCHAR(c, eptr);
1343 cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1344 }
1345 }
1346 else
1347 #endif
1348
1349 /* More streamlined when not in UTF-8 mode */
1350
1351 {
1352 prev_is_word = (eptr != md->start_subject) &&
1353 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1354 cur_is_word = (eptr < md->end_subject) &&
1355 ((md->ctypes[*eptr] & ctype_word) != 0);
1356 }
1357
1358 /* Now see if the situation is what we want */
1359
1360 if ((*ecode++ == OP_WORD_BOUNDARY)?
1361 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1362 RRETURN(MATCH_NOMATCH);
1363 }
1364 break;
1365
1366 /* Match a single character type; inline for speed */
1367
1368 case OP_ANY:
1369 if ((ims & PCRE_DOTALL) == 0)
1370 {
1371 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1372 }
1373 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1374 if (utf8)
1375 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1376 ecode++;
1377 break;
1378
1379 /* Match a single byte, even in UTF-8 mode. This opcode really does match
1380 any byte, even newline, independent of the setting of PCRE_DOTALL. */
1381
1382 case OP_ANYBYTE:
1383 if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1384 ecode++;
1385 break;
1386
1387 case OP_NOT_DIGIT:
1388 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1389 GETCHARINCTEST(c, eptr);
1390 if (
1391 #ifdef SUPPORT_UTF8
1392 c < 256 &&
1393 #endif
1394 (md->ctypes[c] & ctype_digit) != 0
1395 )
1396 RRETURN(MATCH_NOMATCH);
1397 ecode++;
1398 break;
1399
1400 case OP_DIGIT:
1401 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1402 GETCHARINCTEST(c, eptr);
1403 if (
1404 #ifdef SUPPORT_UTF8
1405 c >= 256 ||
1406 #endif
1407 (md->ctypes[c] & ctype_digit) == 0
1408 )
1409 RRETURN(MATCH_NOMATCH);
1410 ecode++;
1411 break;
1412
1413 case OP_NOT_WHITESPACE:
1414 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1415 GETCHARINCTEST(c, eptr);
1416 if (
1417 #ifdef SUPPORT_UTF8
1418 c < 256 &&
1419 #endif
1420 (md->ctypes[c] & ctype_space) != 0
1421 )
1422 RRETURN(MATCH_NOMATCH);
1423 ecode++;
1424 break;
1425
1426 case OP_WHITESPACE:
1427 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1428 GETCHARINCTEST(c, eptr);
1429 if (
1430 #ifdef SUPPORT_UTF8
1431 c >= 256 ||
1432 #endif
1433 (md->ctypes[c] & ctype_space) == 0
1434 )
1435 RRETURN(MATCH_NOMATCH);
1436 ecode++;
1437 break;
1438
1439 case OP_NOT_WORDCHAR:
1440 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1441 GETCHARINCTEST(c, eptr);
1442 if (
1443 #ifdef SUPPORT_UTF8
1444 c < 256 &&
1445 #endif
1446 (md->ctypes[c] & ctype_word) != 0
1447 )
1448 RRETURN(MATCH_NOMATCH);
1449 ecode++;
1450 break;
1451
1452 case OP_WORDCHAR:
1453 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1454 GETCHARINCTEST(c, eptr);
1455 if (
1456 #ifdef SUPPORT_UTF8
1457 c >= 256 ||
1458 #endif
1459 (md->ctypes[c] & ctype_word) == 0
1460 )
1461 RRETURN(MATCH_NOMATCH);
1462 ecode++;
1463 break;
1464
1465 case OP_ANYNL:
1466 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1467 GETCHARINCTEST(c, eptr);
1468 switch(c)
1469 {
1470 default: RRETURN(MATCH_NOMATCH);
1471 case 0x000d:
1472 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1473 break;
1474 case 0x000a:
1475 case 0x000b:
1476 case 0x000c:
1477 case 0x0085:
1478 case 0x2028:
1479 case 0x2029:
1480 break;
1481 }
1482 ecode++;
1483 break;
1484
1485 case OP_NOT_HSPACE:
1486 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1487 GETCHARINCTEST(c, eptr);
1488 switch(c)
1489 {
1490 default: break;
1491 case 0x09: /* HT */
1492 case 0x20: /* SPACE */
1493 case 0xa0: /* NBSP */
1494 case 0x1680: /* OGHAM SPACE MARK */
1495 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1496 case 0x2000: /* EN QUAD */
1497 case 0x2001: /* EM QUAD */
1498 case 0x2002: /* EN SPACE */
1499 case 0x2003: /* EM SPACE */
1500 case 0x2004: /* THREE-PER-EM SPACE */
1501 case 0x2005: /* FOUR-PER-EM SPACE */
1502 case 0x2006: /* SIX-PER-EM SPACE */
1503 case 0x2007: /* FIGURE SPACE */
1504 case 0x2008: /* PUNCTUATION SPACE */
1505 case 0x2009: /* THIN SPACE */
1506 case 0x200A: /* HAIR SPACE */
1507 case 0x202f: /* NARROW NO-BREAK SPACE */
1508 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1509 case 0x3000: /* IDEOGRAPHIC SPACE */
1510 RRETURN(MATCH_NOMATCH);
1511 }
1512 ecode++;
1513 break;
1514
1515 case OP_HSPACE:
1516 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1517 GETCHARINCTEST(c, eptr);
1518 switch(c)
1519 {
1520 default: RRETURN(MATCH_NOMATCH);
1521 case 0x09: /* HT */
1522 case 0x20: /* SPACE */
1523 case 0xa0: /* NBSP */
1524 case 0x1680: /* OGHAM SPACE MARK */
1525 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1526 case 0x2000: /* EN QUAD */
1527 case 0x2001: /* EM QUAD */
1528 case 0x2002: /* EN SPACE */
1529 case 0x2003: /* EM SPACE */
1530 case 0x2004: /* THREE-PER-EM SPACE */
1531 case 0x2005: /* FOUR-PER-EM SPACE */
1532 case 0x2006: /* SIX-PER-EM SPACE */
1533 case 0x2007: /* FIGURE SPACE */
1534 case 0x2008: /* PUNCTUATION SPACE */
1535 case 0x2009: /* THIN SPACE */
1536 case 0x200A: /* HAIR SPACE */
1537 case 0x202f: /* NARROW NO-BREAK SPACE */
1538 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1539 case 0x3000: /* IDEOGRAPHIC SPACE */
1540 break;
1541 }
1542 ecode++;
1543 break;
1544
1545 case OP_NOT_VSPACE:
1546 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1547 GETCHARINCTEST(c, eptr);
1548 switch(c)
1549 {
1550 default: break;
1551 case 0x0a: /* LF */
1552 case 0x0b: /* VT */
1553 case 0x0c: /* FF */
1554 case 0x0d: /* CR */
1555 case 0x85: /* NEL */
1556 case 0x2028: /* LINE SEPARATOR */
1557 case 0x2029: /* PARAGRAPH SEPARATOR */
1558 RRETURN(MATCH_NOMATCH);
1559 }
1560 ecode++;
1561 break;
1562
1563 case OP_VSPACE:
1564 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1565 GETCHARINCTEST(c, eptr);
1566 switch(c)
1567 {
1568 default: RRETURN(MATCH_NOMATCH);
1569 case 0x0a: /* LF */
1570 case 0x0b: /* VT */
1571 case 0x0c: /* FF */
1572 case 0x0d: /* CR */
1573 case 0x85: /* NEL */
1574 case 0x2028: /* LINE SEPARATOR */
1575 case 0x2029: /* PARAGRAPH SEPARATOR */
1576 break;
1577 }
1578 ecode++;
1579 break;
1580
1581 #ifdef SUPPORT_UCP
1582 /* Check the next character by Unicode property. We will get here only
1583 if the support is in the binary; otherwise a compile-time error occurs. */
1584
1585 case OP_PROP:
1586 case OP_NOTPROP:
1587 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1588 GETCHARINCTEST(c, eptr);
1589 {
1590 int chartype, script;
1591 int category = _pcre_ucp_findprop(c, &chartype, &script);
1592
1593 switch(ecode[1])
1594 {
1595 case PT_ANY:
1596 if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1597 break;
1598
1599 case PT_LAMP:
1600 if ((chartype == ucp_Lu ||
1601 chartype == ucp_Ll ||
1602 chartype == ucp_Lt) == (op == OP_NOTPROP))
1603 RRETURN(MATCH_NOMATCH);
1604 break;
1605
1606 case PT_GC:
1607 if ((ecode[2] != category) == (op == OP_PROP))
1608 RRETURN(MATCH_NOMATCH);
1609 break;
1610
1611 case PT_PC:
1612 if ((ecode[2] != chartype) == (op == OP_PROP))
1613 RRETURN(MATCH_NOMATCH);
1614 break;
1615
1616 case PT_SC:
1617 if ((ecode[2] != script) == (op == OP_PROP))
1618 RRETURN(MATCH_NOMATCH);
1619 break;
1620
1621 default:
1622 RRETURN(PCRE_ERROR_INTERNAL);
1623 }
1624
1625 ecode += 3;
1626 }
1627 break;
1628
1629 /* Match an extended Unicode sequence. We will get here only if the support
1630 is in the binary; otherwise a compile-time error occurs. */
1631
1632 case OP_EXTUNI:
1633 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1634 GETCHARINCTEST(c, eptr);
1635 {
1636 int chartype, script;
1637 int category = _pcre_ucp_findprop(c, &chartype, &script);
1638 if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1639 while (eptr < md->end_subject)
1640 {
1641 int len = 1;
1642 if (!utf8) c = *eptr; else
1643 {
1644 GETCHARLEN(c, eptr, len);
1645 }
1646 category = _pcre_ucp_findprop(c, &chartype, &script);
1647 if (category != ucp_M) break;
1648 eptr += len;
1649 }
1650 }
1651 ecode++;
1652 break;
1653 #endif
1654
1655
1656 /* Match a back reference, possibly repeatedly. Look past the end of the
1657 item to see if there is repeat information following. The code is similar
1658 to that for character classes, but repeated for efficiency. Then obey
1659 similar code to character type repeats - written out again for speed.
1660 However, if the referenced string is the empty string, always treat
1661 it as matched, any number of times (otherwise there could be infinite
1662 loops). */
1663
1664 case OP_REF:
1665 {
1666 offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1667 ecode += 3; /* Advance past item */
1668
1669 /* If the reference is unset, set the length to be longer than the amount
1670 of subject left; this ensures that every attempt at a match fails. We
1671 can't just fail here, because of the possibility of quantifiers with zero
1672 minima. */
1673
1674 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
1675 md->end_subject - eptr + 1 :
1676 md->offset_vector[offset+1] - md->offset_vector[offset];
1677
1678 /* Set up for repetition, or handle the non-repeated case */
1679
1680 switch (*ecode)
1681 {
1682 case OP_CRSTAR:
1683 case OP_CRMINSTAR:
1684 case OP_CRPLUS:
1685 case OP_CRMINPLUS:
1686 case OP_CRQUERY:
1687 case OP_CRMINQUERY:
1688 c = *ecode++ - OP_CRSTAR;
1689 minimize = (c & 1) != 0;
1690 min = rep_min[c]; /* Pick up values from tables; */
1691 max = rep_max[c]; /* zero for max => infinity */
1692 if (max == 0) max = INT_MAX;
1693 break;
1694
1695 case OP_CRRANGE:
1696 case OP_CRMINRANGE:
1697 minimize = (*ecode == OP_CRMINRANGE);
1698 min = GET2(ecode, 1);
1699 max = GET2(ecode, 3);
1700 if (max == 0) max = INT_MAX;
1701 ecode += 5;
1702 break;
1703
1704 default: /* No repeat follows */
1705 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1706 eptr += length;
1707 continue; /* With the main loop */
1708 }
1709
1710 /* If the length of the reference is zero, just continue with the
1711 main loop. */
1712
1713 if (length == 0) continue;
1714
1715 /* First, ensure the minimum number of matches are present. We get back
1716 the length of the reference string explicitly rather than passing the
1717 address of eptr, so that eptr can be a register variable. */
1718
1719 for (i = 1; i <= min; i++)
1720 {
1721 if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
1722 eptr += length;
1723 }
1724
1725 /* If min = max, continue at the same level without recursion.
1726 They are not both allowed to be zero. */
1727
1728 if (min == max) continue;
1729
1730 /* If minimizing, keep trying and advancing the pointer */
1731
1732 if (minimize)
1733 {
1734 for (fi = min;; fi++)
1735 {
1736 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1737 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1738 if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1739 RRETURN(MATCH_NOMATCH);
1740 eptr += length;
1741 }
1742 /* Control never gets here */
1743 }
1744
1745 /* If maximizing, find the longest string and work backwards */
1746
1747 else
1748 {
1749 pp = eptr;
1750 for (i = min; i < max; i++)
1751 {
1752 if (!match_ref(offset, eptr, length, md, ims)) break;
1753 eptr += length;
1754 }
1755 while (eptr >= pp)
1756 {
1757 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1758 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1759 eptr -= length;
1760 }
1761 RRETURN(MATCH_NOMATCH);
1762 }
1763 }
1764 /* Control never gets here */
1765
1766
1767
1768 /* Match a bit-mapped character class, possibly repeatedly. This op code is
1769 used when all the characters in the class have values in the range 0-255,
1770 and either the matching is caseful, or the characters are in the range
1771 0-127 when UTF-8 processing is enabled. The only difference between
1772 OP_CLASS and OP_NCLASS occurs when a data character outside the range is
1773 encountered.
1774
1775 First, look past the end of the item to see if there is repeat information
1776 following. Then obey similar code to character type repeats - written out
1777 again for speed. */
1778
1779 case OP_NCLASS:
1780 case OP_CLASS:
1781 {
1782 data = ecode + 1; /* Save for matching */
1783 ecode += 33; /* Advance past the item */
1784
1785 switch (*ecode)
1786 {
1787 case OP_CRSTAR:
1788 case OP_CRMINSTAR:
1789 case OP_CRPLUS:
1790 case OP_CRMINPLUS:
1791 case OP_CRQUERY:
1792 case OP_CRMINQUERY:
1793 c = *ecode++ - OP_CRSTAR;
1794 minimize = (c & 1) != 0;
1795 min = rep_min[c]; /* Pick up values from tables; */
1796 max = rep_max[c]; /* zero for max => infinity */
1797 if (max == 0) max = INT_MAX;
1798 break;
1799
1800 case OP_CRRANGE:
1801 case OP_CRMINRANGE:
1802 minimize = (*ecode == OP_CRMINRANGE);
1803 min = GET2(ecode, 1);
1804 max = GET2(ecode, 3);
1805 if (max == 0) max = INT_MAX;
1806 ecode += 5;
1807 break;
1808
1809 default: /* No repeat follows */
1810 min = max = 1;
1811 break;
1812 }
1813
1814 /* First, ensure the minimum number of matches are present. */
1815
1816 #ifdef SUPPORT_UTF8
1817 /* UTF-8 mode */
1818 if (utf8)
1819 {
1820 for (i = 1; i <= min; i++)
1821 {
1822 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1823 GETCHARINC(c, eptr);
1824 if (c > 255)
1825 {
1826 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1827 }
1828 else
1829 {
1830 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1831 }
1832 }
1833 }
1834 else
1835 #endif
1836 /* Not UTF-8 mode */
1837 {
1838 for (i = 1; i <= min; i++)
1839 {
1840 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1841 c = *eptr++;
1842 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1843 }
1844 }
1845
1846 /* If max == min we can continue with the main loop without the
1847 need to recurse. */
1848
1849 if (min == max) continue;
1850
1851 /* If minimizing, keep testing the rest of the expression and advancing
1852 the pointer while it matches the class. */
1853
1854 if (minimize)
1855 {
1856 #ifdef SUPPORT_UTF8
1857 /* UTF-8 mode */
1858 if (utf8)
1859 {
1860 for (fi = min;; fi++)
1861 {
1862 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1863 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1864 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1865 GETCHARINC(c, eptr);
1866 if (c > 255)
1867 {
1868 if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
1869 }
1870 else
1871 {
1872 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1873 }
1874 }
1875 }
1876 else
1877 #endif
1878 /* Not UTF-8 mode */
1879 {
1880 for (fi = min;; fi++)
1881 {
1882 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1883 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1884 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1885 c = *eptr++;
1886 if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
1887 }
1888 }
1889 /* Control never gets here */
1890 }
1891
1892 /* If maximizing, find the longest possible run, then work backwards. */
1893
1894 else
1895 {
1896 pp = eptr;
1897
1898 #ifdef SUPPORT_UTF8
1899 /* UTF-8 mode */
1900 if (utf8)
1901 {
1902 for (i = min; i < max; i++)
1903 {
1904 int len = 1;
1905 if (eptr >= md->end_subject) break;
1906 GETCHARLEN(c, eptr, len);
1907 if (c > 255)
1908 {
1909 if (op == OP_CLASS) break;
1910 }
1911 else
1912 {
1913 if ((data[c/8] & (1 << (c&7))) == 0) break;
1914 }
1915 eptr += len;
1916 }
1917 for (;;)
1918 {
1919 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1920 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1921 if (eptr-- == pp) break; /* Stop if tried at original pos */
1922 BACKCHAR(eptr);
1923 }
1924 }
1925 else
1926 #endif
1927 /* Not UTF-8 mode */
1928 {
1929 for (i = min; i < max; i++)
1930 {
1931 if (eptr >= md->end_subject) break;
1932 c = *eptr;
1933 if ((data[c/8] & (1 << (c&7))) == 0) break;
1934 eptr++;
1935 }
1936 while (eptr >= pp)
1937 {
1938 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1939 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1940 eptr--;
1941 }
1942 }
1943
1944 RRETURN(MATCH_NOMATCH);
1945 }
1946 }
1947 /* Control never gets here */
1948
1949
1950 /* Match an extended character class. This opcode is encountered only
1951 in UTF-8 mode, because that's the only time it is compiled. */
1952
1953 #ifdef SUPPORT_UTF8
1954 case OP_XCLASS:
1955 {
1956 data = ecode + 1 + LINK_SIZE; /* Save for matching */
1957 ecode += GET(ecode, 1); /* Advance past the item */
1958
1959 switch (*ecode)
1960 {
1961 case OP_CRSTAR:
1962 case OP_CRMINSTAR:
1963 case OP_CRPLUS:
1964 case OP_CRMINPLUS:
1965 case OP_CRQUERY:
1966 case OP_CRMINQUERY:
1967 c = *ecode++ - OP_CRSTAR;
1968 minimize = (c & 1) != 0;
1969 min = rep_min[c]; /* Pick up values from tables; */
1970 max = rep_max[c]; /* zero for max => infinity */
1971 if (max == 0) max = INT_MAX;
1972 break;
1973
1974 case OP_CRRANGE:
1975 case OP_CRMINRANGE:
1976 minimize = (*ecode == OP_CRMINRANGE);
1977 min = GET2(ecode, 1);
1978 max = GET2(ecode, 3);
1979 if (max == 0) max = INT_MAX;
1980 ecode += 5;
1981 break;
1982
1983 default: /* No repeat follows */
1984 min = max = 1;
1985 break;
1986 }
1987
1988 /* First, ensure the minimum number of matches are present. */
1989
1990 for (i = 1; i <= min; i++)
1991 {
1992 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1993 GETCHARINC(c, eptr);
1994 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
1995 }
1996
1997 /* If max == min we can continue with the main loop without the
1998 need to recurse. */
1999
2000 if (min == max) continue;
2001
2002 /* If minimizing, keep testing the rest of the expression and advancing
2003 the pointer while it matches the class. */
2004
2005 if (minimize)
2006 {
2007 for (fi = min;; fi++)
2008 {
2009 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2010 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2011 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2012 GETCHARINC(c, eptr);
2013 if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2014 }
2015 /* Control never gets here */
2016 }
2017
2018 /* If maximizing, find the longest possible run, then work backwards. */
2019
2020 else
2021 {
2022 pp = eptr;
2023 for (i = min; i < max; i++)
2024 {
2025 int len = 1;
2026 if (eptr >= md->end_subject) break;
2027 GETCHARLEN(c, eptr, len);
2028 if (!_pcre_xclass(c, data)) break;
2029 eptr += len;
2030 }
2031 for(;;)
2032 {
2033 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2034 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2035 if (eptr-- == pp) break; /* Stop if tried at original pos */
2036 BACKCHAR(eptr)
2037 }
2038 RRETURN(MATCH_NOMATCH);
2039 }
2040
2041 /* Control never gets here */
2042 }
2043 #endif /* End of XCLASS */
2044
2045 /* Match a single character, casefully */
2046
2047 case OP_CHAR:
2048 #ifdef SUPPORT_UTF8
2049 if (utf8)
2050 {
2051 length = 1;
2052 ecode++;
2053 GETCHARLEN(fc, ecode, length);
2054 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2055 while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2056 }
2057 else
2058 #endif
2059
2060 /* Non-UTF-8 mode */
2061 {
2062 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2063 if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2064 ecode += 2;
2065 }
2066 break;
2067
2068 /* Match a single character, caselessly */
2069
2070 case OP_CHARNC:
2071 #ifdef SUPPORT_UTF8
2072 if (utf8)
2073 {
2074 length = 1;
2075 ecode++;
2076 GETCHARLEN(fc, ecode, length);
2077
2078 if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2079
2080 /* If the pattern character's value is < 128, we have only one byte, and
2081 can use the fast lookup table. */
2082
2083 if (fc < 128)
2084 {
2085 if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2086 }
2087
2088 /* Otherwise we must pick up the subject character */
2089
2090 else
2091 {
2092 unsigned int dc;
2093 GETCHARINC(dc, eptr);
2094 ecode += length;
2095
2096 /* If we have Unicode property support, we can use it to test the other
2097 case of the character, if there is one. */
2098
2099 if (fc != dc)
2100 {
2101 #ifdef SUPPORT_UCP
2102 if (dc != _pcre_ucp_othercase(fc))
2103 #endif
2104 RRETURN(MATCH_NOMATCH);
2105 }
2106 }
2107 }
2108 else
2109 #endif /* SUPPORT_UTF8 */
2110
2111 /* Non-UTF-8 mode */
2112 {
2113 if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
2114 if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2115 ecode += 2;
2116 }
2117 break;
2118
2119 /* Match a single character repeatedly. */
2120
2121 case OP_EXACT:
2122 min = max = GET2(ecode, 1);
2123 ecode += 3;
2124 goto REPEATCHAR;
2125
2126 case OP_POSUPTO:
2127 possessive = TRUE;
2128 /* Fall through */
2129
2130 case OP_UPTO:
2131 case OP_MINUPTO:
2132 min = 0;
2133 max = GET2(ecode, 1);
2134 minimize = *ecode == OP_MINUPTO;
2135 ecode += 3;
2136 goto REPEATCHAR;
2137
2138 case OP_POSSTAR:
2139 possessive = TRUE;
2140 min = 0;
2141 max = INT_MAX;
2142 ecode++;
2143 goto REPEATCHAR;
2144
2145 case OP_POSPLUS:
2146 possessive = TRUE;
2147 min = 1;
2148 max = INT_MAX;
2149 ecode++;
2150 goto REPEATCHAR;
2151
2152 case OP_POSQUERY:
2153 possessive = TRUE;
2154 min = 0;
2155 max = 1;
2156 ecode++;
2157 goto REPEATCHAR;
2158
2159 case OP_STAR:
2160 case OP_MINSTAR:
2161 case OP_PLUS:
2162 case OP_MINPLUS:
2163 case OP_QUERY:
2164 case OP_MINQUERY:
2165 c = *ecode++ - OP_STAR;
2166 minimize = (c & 1) != 0;
2167 min = rep_min[c]; /* Pick up values from tables; */
2168 max = rep_max[c]; /* zero for max => infinity */
2169 if (max == 0) max = INT_MAX;
2170
2171 /* Common code for all repeated single-character matches. We can give
2172 up quickly if there are fewer than the minimum number of characters left in
2173 the subject. */
2174
2175 REPEATCHAR:
2176 #ifdef SUPPORT_UTF8
2177 if (utf8)
2178 {
2179 length = 1;
2180 charptr = ecode;
2181 GETCHARLEN(fc, ecode, length);
2182 if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2183 ecode += length;
2184
2185 /* Handle multibyte character matching specially here. There is
2186 support for caseless matching if UCP support is present. */
2187
2188 if (length > 1)
2189 {
2190 #ifdef SUPPORT_UCP
2191 unsigned int othercase;
2192 if ((ims & PCRE_CASELESS) != 0 &&
2193 (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2194 oclength = _pcre_ord2utf8(othercase, occhars);
2195 else oclength = 0;
2196 #endif /* SUPPORT_UCP */
2197
2198 for (i = 1; i <= min; i++)
2199 {
2200 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2201 #ifdef SUPPORT_UCP
2202 /* Need braces because of following else */
2203 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2204 else
2205 {
2206 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2207 eptr += oclength;
2208 }
2209 #else /* without SUPPORT_UCP */
2210 else { RRETURN(MATCH_NOMATCH); }
2211 #endif /* SUPPORT_UCP */
2212 }
2213
2214 if (min == max) continue;
2215
2216 if (minimize)
2217 {
2218 for (fi = min;; fi++)
2219 {
2220 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2221 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2222 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2223 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2224 #ifdef SUPPORT_UCP
2225 /* Need braces because of following else */
2226 else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2227 else
2228 {
2229 if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2230 eptr += oclength;
2231 }
2232 #else /* without SUPPORT_UCP */
2233 else { RRETURN (MATCH_NOMATCH); }
2234 #endif /* SUPPORT_UCP */
2235 }
2236 /* Control never gets here */
2237 }
2238
2239 else /* Maximize */
2240 {
2241 pp = eptr;
2242 for (i = min; i < max; i++)
2243 {
2244 if (eptr > md->end_subject - length) break;
2245 if (memcmp(eptr, charptr, length) == 0) eptr += length;
2246 #ifdef SUPPORT_UCP
2247 else if (oclength == 0) break;
2248 else
2249 {
2250 if (memcmp(eptr, occhars, oclength) != 0) break;
2251 eptr += oclength;
2252 }
2253 #else /* without SUPPORT_UCP */
2254 else break;
2255 #endif /* SUPPORT_UCP */
2256 }
2257
2258 if (possessive) continue;
2259 for(;;)
2260 {
2261 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2262 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2263 if (eptr == pp) RRETURN(MATCH_NOMATCH);
2264 #ifdef SUPPORT_UCP
2265 eptr--;
2266 BACKCHAR(eptr);
2267 #else /* without SUPPORT_UCP */
2268 eptr -= length;
2269 #endif /* SUPPORT_UCP */
2270 }
2271 }
2272 /* Control never gets here */
2273 }
2274
2275 /* If the length of a UTF-8 character is 1, we fall through here, and
2276 obey the code as for non-UTF-8 characters below, though in this case the
2277 value of fc will always be < 128. */
2278 }
2279 else
2280 #endif /* SUPPORT_UTF8 */
2281
2282 /* When not in UTF-8 mode, load a single-byte character. */
2283 {
2284 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2285 fc = *ecode++;
2286 }
2287
2288 /* The value of fc at this point is always less than 256, though we may or
2289 may not be in UTF-8 mode. The code is duplicated for the caseless and
2290 caseful cases, for speed, since matching characters is likely to be quite
2291 common. First, ensure the minimum number of matches are present. If min =
2292 max, continue at the same level without recursing. Otherwise, if
2293 minimizing, keep trying the rest of the expression and advancing one
2294 matching character if failing, up to the maximum. Alternatively, if
2295 maximizing, find the maximum number of characters and work backwards. */
2296
2297 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2298 max, eptr));
2299
2300 if ((ims & PCRE_CASELESS) != 0)
2301 {
2302 fc = md->lcc[fc];
2303 for (i = 1; i <= min; i++)
2304 if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2305 if (min == max) continue;
2306 if (minimize)
2307 {
2308 for (fi = min;; fi++)
2309 {
2310 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2311 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2312 if (fi >= max || eptr >= md->end_subject ||
2313 fc != md->lcc[*eptr++])
2314 RRETURN(MATCH_NOMATCH);
2315 }
2316 /* Control never gets here */
2317 }
2318 else /* Maximize */
2319 {
2320 pp = eptr;
2321 for (i = min; i < max; i++)
2322 {
2323 if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2324 eptr++;
2325 }
2326 if (possessive) continue;
2327 while (eptr >= pp)
2328 {
2329 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2330 eptr--;
2331 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2332 }
2333 RRETURN(MATCH_NOMATCH);
2334 }
2335 /* Control never gets here */
2336 }
2337
2338 /* Caseful comparisons (includes all multi-byte characters) */
2339
2340 else
2341 {
2342 for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2343 if (min == max) continue;
2344 if (minimize)
2345 {
2346 for (fi = min;; fi++)
2347 {
2348 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2349 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2350 if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2351 RRETURN(MATCH_NOMATCH);
2352 }
2353 /* Control never gets here */
2354 }
2355 else /* Maximize */
2356 {
2357 pp = eptr;
2358 for (i = min; i < max; i++)
2359 {
2360 if (eptr >= md->end_subject || fc != *eptr) break;
2361 eptr++;
2362 }
2363 if (possessive) continue;
2364 while (eptr >= pp)
2365 {
2366 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2367 eptr--;
2368 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2369 }
2370 RRETURN(MATCH_NOMATCH);
2371 }
2372 }
2373 /* Control never gets here */
2374
2375 /* Match a negated single one-byte character. The character we are
2376 checking can be multibyte. */
2377
2378 case OP_NOT:
2379 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2380 ecode++;
2381 GETCHARINCTEST(c, eptr);
2382 if ((ims & PCRE_CASELESS) != 0)
2383 {
2384 #ifdef SUPPORT_UTF8
2385 if (c < 256)
2386 #endif
2387 c = md->lcc[c];
2388 if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);
2389 }
2390 else
2391 {
2392 if (*ecode++ == c) RRETURN(MATCH_NOMATCH);
2393 }
2394 break;
2395
2396 /* Match a negated single one-byte character repeatedly. This is almost a
2397 repeat of the code for a repeated single character, but I haven't found a
2398 nice way of commoning these up that doesn't require a test of the
2399 positive/negative option for each character match. Maybe that wouldn't add
2400 very much to the time taken, but character matching *is* what this is all
2401 about... */
2402
2403 case OP_NOTEXACT:
2404 min = max = GET2(ecode, 1);
2405 ecode += 3;
2406 goto REPEATNOTCHAR;
2407
2408 case OP_NOTUPTO:
2409 case OP_NOTMINUPTO:
2410 min = 0;
2411 max = GET2(ecode, 1);
2412 minimize = *ecode == OP_NOTMINUPTO;
2413 ecode += 3;
2414 goto REPEATNOTCHAR;
2415
2416 case OP_NOTPOSSTAR:
2417 possessive = TRUE;
2418 min = 0;
2419 max = INT_MAX;
2420 ecode++;
2421 goto REPEATNOTCHAR;
2422
2423 case OP_NOTPOSPLUS:
2424 possessive = TRUE;
2425 min = 1;
2426 max = INT_MAX;
2427 ecode++;
2428 goto REPEATNOTCHAR;
2429
2430 case OP_NOTPOSQUERY:
2431 possessive = TRUE;
2432 min = 0;
2433 max = 1;
2434 ecode++;
2435 goto REPEATNOTCHAR;
2436
2437 case OP_NOTPOSUPTO:
2438 possessive = TRUE;
2439 min = 0;
2440 max = GET2(ecode, 1);
2441 ecode += 3;
2442 goto REPEATNOTCHAR;
2443
2444 case OP_NOTSTAR:
2445 case OP_NOTMINSTAR:
2446 case OP_NOTPLUS:
2447 case OP_NOTMINPLUS:
2448 case OP_NOTQUERY:
2449 case OP_NOTMINQUERY:
2450 c = *ecode++ - OP_NOTSTAR;
2451 minimize = (c & 1) != 0;
2452 min = rep_min[c]; /* Pick up values from tables; */
2453 max = rep_max[c]; /* zero for max => infinity */
2454 if (max == 0) max = INT_MAX;
2455
2456 /* Common code for all repeated single-byte matches. We can give up quickly
2457 if there are fewer than the minimum number of bytes left in the
2458 subject. */
2459
2460 REPEATNOTCHAR:
2461 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2462 fc = *ecode++;
2463
2464 /* The code is duplicated for the caseless and caseful cases, for speed,
2465 since matching characters is likely to be quite common. First, ensure the
2466 minimum number of matches are present. If min = max, continue at the same
2467 level without recursing. Otherwise, if minimizing, keep trying the rest of
2468 the expression and advancing one matching character if failing, up to the
2469 maximum. Alternatively, if maximizing, find the maximum number of
2470 characters and work backwards. */
2471
2472 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2473 max, eptr));
2474
2475 if ((ims & PCRE_CASELESS) != 0)
2476 {
2477 fc = md->lcc[fc];
2478
2479 #ifdef SUPPORT_UTF8
2480 /* UTF-8 mode */
2481 if (utf8)
2482 {
2483 register unsigned int d;
2484 for (i = 1; i <= min; i++)
2485 {
2486 GETCHARINC(d, eptr);
2487 if (d < 256) d = md->lcc[d];
2488 if (fc == d) RRETURN(MATCH_NOMATCH);
2489 }
2490 }
2491 else
2492 #endif
2493
2494 /* Not UTF-8 mode */
2495 {
2496 for (i = 1; i <= min; i++)
2497 if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2498 }
2499
2500 if (min == max) continue;
2501
2502 if (minimize)
2503 {
2504 #ifdef SUPPORT_UTF8
2505 /* UTF-8 mode */
2506 if (utf8)
2507 {
2508 register unsigned int d;
2509 for (fi = min;; fi++)
2510 {
2511 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2512 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2513 GETCHARINC(d, eptr);
2514 if (d < 256) d = md->lcc[d];
2515 if (fi >= max || eptr >= md->end_subject || fc == d)
2516 RRETURN(MATCH_NOMATCH);
2517 }
2518 }
2519 else
2520 #endif
2521 /* Not UTF-8 mode */
2522 {
2523 for (fi = min;; fi++)
2524 {
2525 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2526 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527 if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2528 RRETURN(MATCH_NOMATCH);
2529 }
2530 }
2531 /* Control never gets here */
2532 }
2533
2534 /* Maximize case */
2535
2536 else
2537 {
2538 pp = eptr;
2539
2540 #ifdef SUPPORT_UTF8
2541 /* UTF-8 mode */
2542 if (utf8)
2543 {
2544 register unsigned int d;
2545 for (i = min; i < max; i++)
2546 {
2547 int len = 1;
2548 if (eptr >= md->end_subject) break;
2549 GETCHARLEN(d, eptr, len);
2550 if (d < 256) d = md->lcc[d];
2551 if (fc == d) break;
2552 eptr += len;
2553 }
2554 if (possessive) continue;
2555 for(;;)
2556 {
2557 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2558 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2559 if (eptr-- == pp) break; /* Stop if tried at original pos */
2560 BACKCHAR(eptr);
2561 }
2562 }
2563 else
2564 #endif
2565 /* Not UTF-8 mode */
2566 {
2567 for (i = min; i < max; i++)
2568 {
2569 if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2570 eptr++;
2571 }
2572 if (possessive) continue;
2573 while (eptr >= pp)
2574 {
2575 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2576 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2577 eptr--;
2578 }
2579 }
2580
2581 RRETURN(MATCH_NOMATCH);
2582 }
2583 /* Control never gets here */
2584 }
2585
2586 /* Caseful comparisons */
2587
2588 else
2589 {
2590 #ifdef SUPPORT_UTF8
2591 /* UTF-8 mode */
2592 if (utf8)
2593 {
2594 register unsigned int d;
2595 for (i = 1; i <= min; i++)
2596 {
2597 GETCHARINC(d, eptr);
2598 if (fc == d) RRETURN(MATCH_NOMATCH);
2599 }
2600 }
2601 else
2602 #endif
2603 /* Not UTF-8 mode */
2604 {
2605 for (i = 1; i <= min; i++)
2606 if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2607 }
2608
2609 if (min == max) continue;
2610
2611 if (minimize)
2612 {
2613 #ifdef SUPPORT_UTF8
2614 /* UTF-8 mode */
2615 if (utf8)
2616 {
2617 register unsigned int d;
2618 for (fi = min;; fi++)
2619 {
2620 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2621 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2622 GETCHARINC(d, eptr);
2623 if (fi >= max || eptr >= md->end_subject || fc == d)
2624 RRETURN(MATCH_NOMATCH);
2625 }
2626 }
2627 else
2628 #endif
2629 /* Not UTF-8 mode */
2630 {
2631 for (fi = min;; fi++)
2632 {
2633 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2634 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2635 if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2636 RRETURN(MATCH_NOMATCH);
2637 }
2638 }
2639 /* Control never gets here */
2640 }
2641
2642 /* Maximize case */
2643
2644 else
2645 {
2646 pp = eptr;
2647
2648 #ifdef SUPPORT_UTF8
2649 /* UTF-8 mode */
2650 if (utf8)
2651 {
2652 register unsigned int d;
2653 for (i = min; i < max; i++)
2654 {
2655 int len = 1;
2656 if (eptr >= md->end_subject) break;
2657 GETCHARLEN(d, eptr, len);
2658 if (fc == d) break;
2659 eptr += len;
2660 }
2661 if (possessive) continue;
2662 for(;;)
2663 {
2664 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2665 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2666 if (eptr-- == pp) break; /* Stop if tried at original pos */
2667 BACKCHAR(eptr);
2668 }
2669 }
2670 else
2671 #endif
2672 /* Not UTF-8 mode */
2673 {
2674 for (i = min; i < max; i++)
2675 {
2676 if (eptr >= md->end_subject || fc == *eptr) break;
2677 eptr++;
2678 }
2679 if (possessive) continue;
2680 while (eptr >= pp)
2681 {
2682 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2683 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684 eptr--;
2685 }
2686 }
2687
2688 RRETURN(MATCH_NOMATCH);
2689 }
2690 }
2691 /* Control never gets here */
2692
2693 /* Match a single character type repeatedly; several different opcodes
2694 share code. This is very similar to the code for single characters, but we
2695 repeat it in the interests of efficiency. */
2696
2697 case OP_TYPEEXACT:
2698 min = max = GET2(ecode, 1);
2699 minimize = TRUE;
2700 ecode += 3;
2701 goto REPEATTYPE;
2702
2703 case OP_TYPEUPTO:
2704 case OP_TYPEMINUPTO:
2705 min = 0;
2706 max = GET2(ecode, 1);
2707 minimize = *ecode == OP_TYPEMINUPTO;
2708 ecode += 3;
2709 goto REPEATTYPE;
2710
2711 case OP_TYPEPOSSTAR:
2712 possessive = TRUE;
2713 min = 0;
2714 max = INT_MAX;
2715 ecode++;
2716 goto REPEATTYPE;
2717
2718 case OP_TYPEPOSPLUS:
2719 possessive = TRUE;
2720 min = 1;
2721 max = INT_MAX;
2722 ecode++;
2723 goto REPEATTYPE;
2724
2725 case OP_TYPEPOSQUERY:
2726 possessive = TRUE;
2727 min = 0;
2728 max = 1;
2729 ecode++;
2730 goto REPEATTYPE;
2731
2732 case OP_TYPEPOSUPTO:
2733 possessive = TRUE;
2734 min = 0;
2735 max = GET2(ecode, 1);
2736 ecode += 3;
2737 goto REPEATTYPE;
2738
2739 case OP_TYPESTAR:
2740 case OP_TYPEMINSTAR:
2741 case OP_TYPEPLUS:
2742 case OP_TYPEMINPLUS:
2743 case OP_TYPEQUERY:
2744 case OP_TYPEMINQUERY:
2745 c = *ecode++ - OP_TYPESTAR;
2746 minimize = (c & 1) != 0;
2747 min = rep_min[c]; /* Pick up values from tables; */
2748 max = rep_max[c]; /* zero for max => infinity */
2749 if (max == 0) max = INT_MAX;
2750
2751 /* Common code for all repeated single character type matches. Note that
2752 in UTF-8 mode, '.' matches a character of any length, but for the other
2753 character types, the valid characters are all one-byte long. */
2754
2755 REPEATTYPE:
2756 ctype = *ecode++; /* Code for the character type */
2757
2758 #ifdef SUPPORT_UCP
2759 if (ctype == OP_PROP || ctype == OP_NOTPROP)
2760 {
2761 prop_fail_result = ctype == OP_NOTPROP;
2762 prop_type = *ecode++;
2763 prop_value = *ecode++;
2764 }
2765 else prop_type = -1;
2766 #endif
2767
2768 /* First, ensure the minimum number of matches are present. Use inline
2769 code for maximizing the speed, and do the type test once at the start
2770 (i.e. keep it out of the loop). Also we can test that there are at least
2771 the minimum number of bytes before we start. This isn't as effective in
2772 UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
2773 is tidier. Also separate the UCP code, which can be the same for both UTF-8
2774 and single-bytes. */
2775
2776 if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
2777 if (min > 0)
2778 {
2779 #ifdef SUPPORT_UCP
2780 if (prop_type >= 0)
2781 {
2782 switch(prop_type)
2783 {
2784 case PT_ANY:
2785 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2786 for (i = 1; i <= min; i++)
2787 {
2788 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2789 GETCHARINCTEST(c, eptr);
2790 }
2791 break;
2792
2793 case PT_LAMP:
2794 for (i = 1; i <= min; i++)
2795 {
2796 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2797 GETCHARINCTEST(c, eptr);
2798 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2799 if ((prop_chartype == ucp_Lu ||
2800 prop_chartype == ucp_Ll ||
2801 prop_chartype == ucp_Lt) == prop_fail_result)
2802 RRETURN(MATCH_NOMATCH);
2803 }
2804 break;
2805
2806 case PT_GC:
2807 for (i = 1; i <= min; i++)
2808 {
2809 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2810 GETCHARINCTEST(c, eptr);
2811 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2812 if ((prop_category == prop_value) == prop_fail_result)
2813 RRETURN(MATCH_NOMATCH);
2814 }
2815 break;
2816
2817 case PT_PC:
2818 for (i = 1; i <= min; i++)
2819 {
2820 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2821 GETCHARINCTEST(c, eptr);
2822 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2823 if ((prop_chartype == prop_value) == prop_fail_result)
2824 RRETURN(MATCH_NOMATCH);
2825 }
2826 break;
2827
2828 case PT_SC:
2829 for (i = 1; i <= min; i++)
2830 {
2831 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2832 GETCHARINCTEST(c, eptr);
2833 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2834 if ((prop_script == prop_value) == prop_fail_result)
2835 RRETURN(MATCH_NOMATCH);
2836 }
2837 break;
2838
2839 default:
2840 RRETURN(PCRE_ERROR_INTERNAL);
2841 }
2842 }
2843
2844 /* Match extended Unicode sequences. We will get here only if the
2845 support is in the binary; otherwise a compile-time error occurs. */
2846
2847 else if (ctype == OP_EXTUNI)
2848 {
2849 for (i = 1; i <= min; i++)
2850 {
2851 GETCHARINCTEST(c, eptr);
2852 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2853 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2854 while (eptr < md->end_subject)
2855 {
2856 int len = 1;
2857 if (!utf8) c = *eptr; else
2858 {
2859 GETCHARLEN(c, eptr, len);
2860 }
2861 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2862 if (prop_category != ucp_M) break;
2863 eptr += len;
2864 }
2865 }
2866 }
2867
2868 else
2869 #endif /* SUPPORT_UCP */
2870
2871 /* Handle all other cases when the coding is UTF-8 */
2872
2873 #ifdef SUPPORT_UTF8
2874 if (utf8) switch(ctype)
2875 {
2876 case OP_ANY:
2877 for (i = 1; i <= min; i++)
2878 {
2879 if (eptr >= md->end_subject ||
2880 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2881 RRETURN(MATCH_NOMATCH);
2882 eptr++;
2883 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2884 }
2885 break;
2886
2887 case OP_ANYBYTE:
2888 eptr += min;
2889 break;
2890
2891 case OP_ANYNL:
2892 for (i = 1; i <= min; i++)
2893 {
2894 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2895 GETCHARINC(c, eptr);
2896 switch(c)
2897 {
2898 default: RRETURN(MATCH_NOMATCH);
2899 case 0x000d:
2900 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2901 break;
2902 case 0x000a:
2903 case 0x000b:
2904 case 0x000c:
2905 case 0x0085:
2906 case 0x2028:
2907 case 0x2029:
2908 break;
2909 }
2910 }
2911 break;
2912
2913 case OP_NOT_HSPACE:
2914 for (i = 1; i <= min; i++)
2915 {
2916 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2917 GETCHARINC(c, eptr);
2918 switch(c)
2919 {
2920 default: break;
2921 case 0x09: /* HT */
2922 case 0x20: /* SPACE */
2923 case 0xa0: /* NBSP */
2924 case 0x1680: /* OGHAM SPACE MARK */
2925 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2926 case 0x2000: /* EN QUAD */
2927 case 0x2001: /* EM QUAD */
2928 case 0x2002: /* EN SPACE */
2929 case 0x2003: /* EM SPACE */
2930 case 0x2004: /* THREE-PER-EM SPACE */
2931 case 0x2005: /* FOUR-PER-EM SPACE */
2932 case 0x2006: /* SIX-PER-EM SPACE */
2933 case 0x2007: /* FIGURE SPACE */
2934 case 0x2008: /* PUNCTUATION SPACE */
2935 case 0x2009: /* THIN SPACE */
2936 case 0x200A: /* HAIR SPACE */
2937 case 0x202f: /* NARROW NO-BREAK SPACE */
2938 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2939 case 0x3000: /* IDEOGRAPHIC SPACE */
2940 RRETURN(MATCH_NOMATCH);
2941 }
2942 }
2943 break;
2944
2945 case OP_HSPACE:
2946 for (i = 1; i <= min; i++)
2947 {
2948 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949 GETCHARINC(c, eptr);
2950 switch(c)
2951 {
2952 default: RRETURN(MATCH_NOMATCH);
2953 case 0x09: /* HT */
2954 case 0x20: /* SPACE */
2955 case 0xa0: /* NBSP */
2956 case 0x1680: /* OGHAM SPACE MARK */
2957 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2958 case 0x2000: /* EN QUAD */
2959 case 0x2001: /* EM QUAD */
2960 case 0x2002: /* EN SPACE */
2961 case 0x2003: /* EM SPACE */
2962 case 0x2004: /* THREE-PER-EM SPACE */
2963 case 0x2005: /* FOUR-PER-EM SPACE */
2964 case 0x2006: /* SIX-PER-EM SPACE */
2965 case 0x2007: /* FIGURE SPACE */
2966 case 0x2008: /* PUNCTUATION SPACE */
2967 case 0x2009: /* THIN SPACE */
2968 case 0x200A: /* HAIR SPACE */
2969 case 0x202f: /* NARROW NO-BREAK SPACE */
2970 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2971 case 0x3000: /* IDEOGRAPHIC SPACE */
2972 break;
2973 }
2974 }
2975 break;
2976
2977 case OP_NOT_VSPACE:
2978 for (i = 1; i <= min; i++)
2979 {
2980 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2981 GETCHARINC(c, eptr);
2982 switch(c)
2983 {
2984 default: break;
2985 case 0x0a: /* LF */
2986 case 0x0b: /* VT */
2987 case 0x0c: /* FF */
2988 case 0x0d: /* CR */
2989 case 0x85: /* NEL */
2990 case 0x2028: /* LINE SEPARATOR */
2991 case 0x2029: /* PARAGRAPH SEPARATOR */
2992 RRETURN(MATCH_NOMATCH);
2993 }
2994 }
2995 break;
2996
2997 case OP_VSPACE:
2998 for (i = 1; i <= min; i++)
2999 {
3000 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3001 GETCHARINC(c, eptr);
3002 switch(c)
3003 {
3004 default: RRETURN(MATCH_NOMATCH);
3005 case 0x0a: /* LF */
3006 case 0x0b: /* VT */
3007 case 0x0c: /* FF */
3008 case 0x0d: /* CR */
3009 case 0x85: /* NEL */
3010 case 0x2028: /* LINE SEPARATOR */
3011 case 0x2029: /* PARAGRAPH SEPARATOR */
3012 break;
3013 }
3014 }
3015 break;
3016
3017 case OP_NOT_DIGIT:
3018 for (i = 1; i <= min; i++)
3019 {
3020 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3021 GETCHARINC(c, eptr);
3022 if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3023 RRETURN(MATCH_NOMATCH);
3024 }
3025 break;
3026
3027 case OP_DIGIT:
3028 for (i = 1; i <= min; i++)
3029 {
3030 if (eptr >= md->end_subject ||
3031 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3032 RRETURN(MATCH_NOMATCH);
3033 /* No need to skip more bytes - we know it's a 1-byte character */
3034 }
3035 break;
3036
3037 case OP_NOT_WHITESPACE:
3038 for (i = 1; i <= min; i++)
3039 {
3040 if (eptr >= md->end_subject ||
3041 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))
3042 RRETURN(MATCH_NOMATCH);
3043 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3044 }
3045 break;
3046
3047 case OP_WHITESPACE:
3048 for (i = 1; i <= min; i++)
3049 {
3050 if (eptr >= md->end_subject ||
3051 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3052 RRETURN(MATCH_NOMATCH);
3053 /* No need to skip more bytes - we know it's a 1-byte character */
3054 }
3055 break;
3056
3057 case OP_NOT_WORDCHAR:
3058 for (i = 1; i <= min; i++)
3059 {
3060 if (eptr >= md->end_subject ||
3061 (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))
3062 RRETURN(MATCH_NOMATCH);
3063 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3064 }
3065 break;
3066
3067 case OP_WORDCHAR:
3068 for (i = 1; i <= min; i++)
3069 {
3070 if (eptr >= md->end_subject ||
3071 *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3072 RRETURN(MATCH_NOMATCH);
3073 /* No need to skip more bytes - we know it's a 1-byte character */
3074 }
3075 break;
3076
3077 default:
3078 RRETURN(PCRE_ERROR_INTERNAL);
3079 } /* End switch(ctype) */
3080
3081 else
3082 #endif /* SUPPORT_UTF8 */
3083
3084 /* Code for the non-UTF-8 case for minimum matching of operators other
3085 than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
3086 number of bytes present, as this was tested above. */
3087
3088 switch(ctype)
3089 {
3090 case OP_ANY:
3091 if ((ims & PCRE_DOTALL) == 0)
3092 {
3093 for (i = 1; i <= min; i++)
3094 {
3095 if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3096 eptr++;
3097 }
3098 }
3099 else eptr += min;
3100 break;
3101
3102 case OP_ANYBYTE:
3103 eptr += min;
3104 break;
3105
3106 /* Because of the CRLF case, we can't assume the minimum number of
3107 bytes are present in this case. */
3108
3109 case OP_ANYNL:
3110 for (i = 1; i <= min; i++)
3111 {
3112 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3113 switch(*eptr++)
3114 {
3115 default: RRETURN(MATCH_NOMATCH);
3116 case 0x000d:
3117 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3118 break;
3119 case 0x000a:
3120 case 0x000b:
3121 case 0x000c:
3122 case 0x0085:
3123 break;
3124 }
3125 }
3126 break;
3127
3128 case OP_NOT_HSPACE:
3129 for (i = 1; i <= min; i++)
3130 {
3131 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3132 switch(*eptr++)
3133 {
3134 default: break;
3135 case 0x09: /* HT */
3136 case 0x20: /* SPACE */
3137 case 0xa0: /* NBSP */
3138 RRETURN(MATCH_NOMATCH);
3139 }
3140 }
3141 break;
3142
3143 case OP_HSPACE:
3144 for (i = 1; i <= min; i++)
3145 {
3146 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3147 switch(*eptr++)
3148 {
3149 default: RRETURN(MATCH_NOMATCH);
3150 case 0x09: /* HT */
3151 case 0x20: /* SPACE */
3152 case 0xa0: /* NBSP */
3153 break;
3154 }
3155 }
3156 break;
3157
3158 case OP_NOT_VSPACE:
3159 for (i = 1; i <= min; i++)
3160 {
3161 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3162 switch(*eptr++)
3163 {
3164 default: break;
3165 case 0x0a: /* LF */
3166 case 0x0b: /* VT */
3167 case 0x0c: /* FF */
3168 case 0x0d: /* CR */
3169 case 0x85: /* NEL */
3170 RRETURN(MATCH_NOMATCH);
3171 }
3172 }
3173 break;
3174
3175 case OP_VSPACE:
3176 for (i = 1; i <= min; i++)
3177 {
3178 if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179 switch(*eptr++)
3180 {
3181 default: RRETURN(MATCH_NOMATCH);
3182 case 0x0a: /* LF */
3183 case 0x0b: /* VT */
3184 case 0x0c: /* FF */
3185 case 0x0d: /* CR */
3186 case 0x85: /* NEL */
3187 break;
3188 }
3189 }
3190 break;
3191
3192 case OP_NOT_DIGIT:
3193 for (i = 1; i <= min; i++)
3194 if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3195 break;
3196
3197 case OP_DIGIT:
3198 for (i = 1; i <= min; i++)
3199 if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3200 break;
3201
3202 case OP_NOT_WHITESPACE:
3203 for (i = 1; i <= min; i++)
3204 if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3205 break;
3206
3207 case OP_WHITESPACE:
3208 for (i = 1; i <= min; i++)
3209 if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3210 break;
3211
3212 case OP_NOT_WORDCHAR:
3213 for (i = 1; i <= min; i++)
3214 if ((md->ctypes[*eptr++] & ctype_word) != 0)
3215 RRETURN(MATCH_NOMATCH);
3216 break;
3217
3218 case OP_WORDCHAR:
3219 for (i = 1; i <= min; i++)
3220 if ((md->ctypes[*eptr++] & ctype_word) == 0)
3221 RRETURN(MATCH_NOMATCH);
3222 break;
3223
3224 default:
3225 RRETURN(PCRE_ERROR_INTERNAL);
3226 }
3227 }
3228
3229 /* If min = max, continue at the same level without recursing */
3230
3231 if (min == max) continue;
3232
3233 /* If minimizing, we have to test the rest of the pattern before each
3234 subsequent match. Again, separate the UTF-8 case for speed, and also
3235 separate the UCP cases. */
3236
3237 if (minimize)
3238 {
3239 #ifdef SUPPORT_UCP
3240 if (prop_type >= 0)
3241 {
3242 switch(prop_type)
3243 {
3244 case PT_ANY:
3245 for (fi = min;; fi++)
3246 {
3247 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3248 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3249 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3250 GETCHARINC(c, eptr);
3251 if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3252 }
3253 /* Control never gets here */
3254
3255 case PT_LAMP:
3256 for (fi = min;; fi++)
3257 {
3258 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3259 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3261 GETCHARINC(c, eptr);
3262 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3263 if ((prop_chartype == ucp_Lu ||
3264 prop_chartype == ucp_Ll ||
3265 prop_chartype == ucp_Lt) == prop_fail_result)
3266 RRETURN(MATCH_NOMATCH);
3267 }
3268 /* Control never gets here */
3269
3270 case PT_GC:
3271 for (fi = min;; fi++)
3272 {
3273 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3274 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3275 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3276 GETCHARINC(c, eptr);
3277 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3278 if ((prop_category == prop_value) == prop_fail_result)
3279 RRETURN(MATCH_NOMATCH);
3280 }
3281 /* Control never gets here */
3282
3283 case PT_PC:
3284 for (fi = min;; fi++)
3285 {
3286 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3287 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3288 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3289 GETCHARINC(c, eptr);
3290 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3291 if ((prop_chartype == prop_value) == prop_fail_result)
3292 RRETURN(MATCH_NOMATCH);
3293 }
3294 /* Control never gets here */
3295
3296 case PT_SC:
3297 for (fi = min;; fi++)
3298 {
3299 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3300 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3302 GETCHARINC(c, eptr);
3303 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3304 if ((prop_script == prop_value) == prop_fail_result)
3305 RRETURN(MATCH_NOMATCH);
3306 }
3307 /* Control never gets here */
3308
3309 default:
3310 RRETURN(PCRE_ERROR_INTERNAL);
3311 }
3312 }
3313
3314 /* Match extended Unicode sequences. We will get here only if the
3315 support is in the binary; otherwise a compile-time error occurs. */
3316
3317 else if (ctype == OP_EXTUNI)
3318 {
3319 for (fi = min;; fi++)
3320 {
3321 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3322 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3323 if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3324 GETCHARINCTEST(c, eptr);
3325 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3326 if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3327 while (eptr < md->end_subject)
3328 {
3329 int len = 1;
3330 if (!utf8) c = *eptr; else
3331 {
3332 GETCHARLEN(c, eptr, len);
3333 }
3334 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3335 if (prop_category != ucp_M) break;
3336 eptr += len;
3337 }
3338 }
3339 }
3340
3341 else
3342 #endif /* SUPPORT_UCP */
3343
3344 #ifdef SUPPORT_UTF8
3345 /* UTF-8 mode */
3346 if (utf8)
3347 {
3348 for (fi = min;; fi++)
3349 {
3350 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3351 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3352 if (fi >= max || eptr >= md->end_subject ||
3353 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3354 IS_NEWLINE(eptr)))
3355 RRETURN(MATCH_NOMATCH);
3356
3357 GETCHARINC(c, eptr);
3358 switch(ctype)
3359 {
3360 case OP_ANY: /* This is the DOTALL case */
3361 break;
3362
3363 case OP_ANYBYTE:
3364 break;
3365
3366 case OP_ANYNL:
3367 switch(c)
3368 {
3369 default: RRETURN(MATCH_NOMATCH);
3370 case 0x000d:
3371 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3372 break;
3373 case 0x000a:
3374 case 0x000b:
3375 case 0x000c:
3376 case 0x0085:
3377 case 0x2028:
3378 case 0x2029:
3379 break;
3380 }
3381 break;
3382
3383 case OP_NOT_HSPACE:
3384 switch(c)
3385 {
3386 default: break;
3387 case 0x09: /* HT */
3388 case 0x20: /* SPACE */
3389 case 0xa0: /* NBSP */
3390 case 0x1680: /* OGHAM SPACE MARK */
3391 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3392 case 0x2000: /* EN QUAD */
3393 case 0x2001: /* EM QUAD */
3394 case 0x2002: /* EN SPACE */
3395 case 0x2003: /* EM SPACE */
3396 case 0x2004: /* THREE-PER-EM SPACE */
3397 case 0x2005: /* FOUR-PER-EM SPACE */
3398 case 0x2006: /* SIX-PER-EM SPACE */
3399 case 0x2007: /* FIGURE SPACE */
3400 case 0x2008: /* PUNCTUATION SPACE */
3401 case 0x2009: /* THIN SPACE */
3402 case 0x200A: /* HAIR SPACE */
3403 case 0x202f: /* NARROW NO-BREAK SPACE */
3404 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3405 case 0x3000: /* IDEOGRAPHIC SPACE */
3406 RRETURN(MATCH_NOMATCH);
3407 }
3408 break;
3409
3410 case OP_HSPACE:
3411 switch(c)
3412 {
3413 default: RRETURN(MATCH_NOMATCH);
3414 case 0x09: /* HT */
3415 case 0x20: /* SPACE */
3416 case 0xa0: /* NBSP */
3417 case 0x1680: /* OGHAM SPACE MARK */
3418 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3419 case 0x2000: /* EN QUAD */
3420 case 0x2001: /* EM QUAD */
3421 case 0x2002: /* EN SPACE */
3422 case 0x2003: /* EM SPACE */
3423 case 0x2004: /* THREE-PER-EM SPACE */
3424 case 0x2005: /* FOUR-PER-EM SPACE */
3425 case 0x2006: /* SIX-PER-EM SPACE */
3426 case 0x2007: /* FIGURE SPACE */
3427 case 0x2008: /* PUNCTUATION SPACE */
3428 case 0x2009: /* THIN SPACE */
3429 case 0x200A: /* HAIR SPACE */
3430 case 0x202f: /* NARROW NO-BREAK SPACE */
3431 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3432 case 0x3000: /* IDEOGRAPHIC SPACE */
3433 break;
3434 }
3435 break;
3436
3437 case OP_NOT_VSPACE:
3438 switch(c)
3439 {
3440 default: break;
3441 case 0x0a: /* LF */
3442 case 0x0b: /* VT */
3443 case 0x0c: /* FF */
3444 case 0x0d: /* CR */
3445 case 0x85: /* NEL */
3446 case 0x2028: /* LINE SEPARATOR */
3447 case 0x2029: /* PARAGRAPH SEPARATOR */
3448 RRETURN(MATCH_NOMATCH);
3449 }
3450 break;
3451
3452 case OP_VSPACE:
3453 switch(c)
3454 {
3455 default: RRETURN(MATCH_NOMATCH);
3456 case 0x0a: /* LF */
3457 case 0x0b: /* VT */
3458 case 0x0c: /* FF */
3459 case 0x0d: /* CR */
3460 case 0x85: /* NEL */
3461 case 0x2028: /* LINE SEPARATOR */
3462 case 0x2029: /* PARAGRAPH SEPARATOR */
3463 break;
3464 }
3465 break;
3466
3467 case OP_NOT_DIGIT:
3468 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3469 RRETURN(MATCH_NOMATCH);
3470 break;
3471
3472 case OP_DIGIT:
3473 if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
3474 RRETURN(MATCH_NOMATCH);
3475 break;
3476
3477 case OP_NOT_WHITESPACE:
3478 if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
3479 RRETURN(MATCH_NOMATCH);
3480 break;
3481
3482 case OP_WHITESPACE:
3483 if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
3484 RRETURN(MATCH_NOMATCH);
3485 break;
3486
3487 case OP_NOT_WORDCHAR:
3488 if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
3489 RRETURN(MATCH_NOMATCH);
3490 break;
3491
3492 case OP_WORDCHAR:
3493 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
3494 RRETURN(MATCH_NOMATCH);
3495 break;
3496
3497 default:
3498 RRETURN(PCRE_ERROR_INTERNAL);
3499 }
3500 }
3501 }
3502 else
3503 #endif
3504 /* Not UTF-8 mode */
3505 {
3506 for (fi = min;; fi++)
3507 {
3508 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3509 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3510 if (fi >= max || eptr >= md->end_subject ||
3511 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3512 RRETURN(MATCH_NOMATCH);
3513
3514 c = *eptr++;
3515 switch(ctype)
3516 {
3517 case OP_ANY: /* This is the DOTALL case */
3518 break;
3519
3520 case OP_ANYBYTE:
3521 break;
3522
3523 case OP_ANYNL:
3524 switch(c)
3525 {
3526 default: RRETURN(MATCH_NOMATCH);
3527 case 0x000d:
3528 if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3529 break;
3530 case 0x000a:
3531 case 0x000b:
3532 case 0x000c:
3533 case 0x0085:
3534 break;
3535 }
3536 break;
3537
3538 case OP_NOT_HSPACE:
3539 switch(c)
3540 {
3541 default: break;
3542 case 0x09: /* HT */
3543 case 0x20: /* SPACE */
3544 case 0xa0: /* NBSP */
3545 RRETURN(MATCH_NOMATCH);
3546 }
3547 break;
3548
3549 case OP_HSPACE:
3550 switch(c)
3551 {
3552 default: RRETURN(MATCH_NOMATCH);
3553 case 0x09: /* HT */
3554 case 0x20: /* SPACE */
3555 case 0xa0: /* NBSP */
3556 break;
3557 }
3558 break;
3559
3560 case OP_NOT_VSPACE:
3561 switch(c)
3562 {
3563 default: break;
3564 case 0x0a: /* LF */
3565 case 0x0b: /* VT */
3566 case 0x0c: /* FF */
3567 case 0x0d: /* CR */
3568 case 0x85: /* NEL */
3569 RRETURN(MATCH_NOMATCH);
3570 }
3571 break;
3572
3573 case OP_VSPACE:
3574 switch(c)
3575 {
3576 default: RRETURN(MATCH_NOMATCH);
3577 case 0x0a: /* LF */
3578 case 0x0b: /* VT */
3579 case 0x0c: /* FF */
3580 case 0x0d: /* CR */
3581 case 0x85: /* NEL */
3582 break;
3583 }
3584 break;
3585
3586 case OP_NOT_DIGIT:
3587 if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3588 break;
3589
3590 case OP_DIGIT:
3591 if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3592 break;
3593
3594 case OP_NOT_WHITESPACE:
3595 if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3596 break;
3597
3598 case OP_WHITESPACE:
3599 if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3600 break;
3601
3602 case OP_NOT_WORDCHAR:
3603 if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
3604 break;
3605
3606 case OP_WORDCHAR:
3607 if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
3608 break;
3609
3610 default:
3611 RRETURN(PCRE_ERROR_INTERNAL);
3612 }
3613 }
3614 }
3615 /* Control never gets here */
3616 }
3617
3618 /* If maximizing, it is worth using inline code for speed, doing the type
3619 test once at the start (i.e. keep it out of the loop). Again, keep the
3620 UTF-8 and UCP stuff separate. */
3621
3622 else
3623 {
3624 pp = eptr; /* Remember where we started */
3625
3626 #ifdef SUPPORT_UCP
3627 if (prop_type >= 0)
3628 {
3629 switch(prop_type)
3630 {
3631 case PT_ANY:
3632 for (i = min; i < max; i++)
3633 {
3634 int len = 1;
3635 if (eptr >= md->end_subject) break;
3636 GETCHARLEN(c, eptr, len);
3637 if (prop_fail_result) break;
3638 eptr+= len;
3639 }
3640 break;
3641
3642 case PT_LAMP:
3643 for (i = min; i < max; i++)
3644 {
3645 int len = 1;
3646 if (eptr >= md->end_subject) break;
3647 GETCHARLEN(c, eptr, len);
3648 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3649 if ((prop_chartype == ucp_Lu ||
3650 prop_chartype == ucp_Ll ||
3651 prop_chartype == ucp_Lt) == prop_fail_result)
3652 break;
3653 eptr+= len;
3654 }
3655 break;
3656
3657 case PT_GC:
3658 for (i = min; i < max; i++)
3659 {
3660 int len = 1;
3661 if (eptr >= md->end_subject) break;
3662 GETCHARLEN(c, eptr, len);
3663 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3664 if ((prop_category == prop_value) == prop_fail_result)
3665 break;
3666 eptr+= len;
3667 }
3668 break;
3669
3670 case PT_PC:
3671 for (i = min; i < max; i++)
3672 {
3673 int len = 1;
3674 if (eptr >= md->end_subject) break;
3675 GETCHARLEN(c, eptr, len);
3676 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3677 if ((prop_chartype == prop_value) == prop_fail_result)
3678 break;
3679 eptr+= len;
3680 }
3681 break;
3682
3683 case PT_SC:
3684 for (i = min; i < max; i++)
3685 {
3686 int len = 1;
3687 if (eptr >= md->end_subject) break;
3688 GETCHARLEN(c, eptr, len);
3689 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3690 if ((prop_script == prop_value) == prop_fail_result)
3691 break;
3692 eptr+= len;
3693 }
3694 break;
3695 }
3696
3697 /* eptr is now past the end of the maximum run */
3698
3699 if (possessive) continue;
3700 for(;;)
3701 {
3702 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3703 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3704 if (eptr-- == pp) break; /* Stop if tried at original pos */
3705 BACKCHAR(eptr);
3706 }
3707 }
3708
3709 /* Match extended Unicode sequences. We will get here only if the
3710 support is in the binary; otherwise a compile-time error occurs. */
3711
3712 else if (ctype == OP_EXTUNI)
3713 {
3714 for (i = min; i < max; i++)
3715 {
3716 if (eptr >= md->end_subject) break;
3717 GETCHARINCTEST(c, eptr);
3718 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3719 if (prop_category == ucp_M) break;
3720 while (eptr < md->end_subject)
3721 {
3722 int len = 1;
3723 if (!utf8) c = *eptr; else
3724 {
3725 GETCHARLEN(c, eptr, len);
3726 }
3727 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3728 if (prop_category != ucp_M) break;
3729 eptr += len;
3730 }
3731 }
3732
3733 /* eptr is now past the end of the maximum run */
3734
3735 if (possessive) continue;
3736 for(;;)
3737 {
3738 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3739 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3740 if (eptr-- == pp) break; /* Stop if tried at original pos */
3741 for (;;) /* Move back over one extended */
3742 {
3743 int len = 1;
3744 BACKCHAR(eptr);
3745 if (!utf8) c = *eptr; else
3746 {
3747 GETCHARLEN(c, eptr, len);
3748 }
3749 prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3750 if (prop_category != ucp_M) break;
3751 eptr--;
3752 }
3753 }
3754 }
3755
3756 else
3757 #endif /* SUPPORT_UCP */
3758
3759 #ifdef SUPPORT_UTF8
3760 /* UTF-8 mode */
3761
3762 if (utf8)
3763 {
3764 switch(ctype)
3765 {
3766 case OP_ANY:
3767 if (max < INT_MAX)
3768 {
3769 if ((ims & PCRE_DOTALL) == 0)
3770 {
3771 for (i = min; i < max; i++)
3772 {
3773 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3774 eptr++;
3775 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3776 }
3777 }
3778 else
3779 {
3780 for (i = min; i < max; i++)
3781 {
3782 if (eptr >= md->end_subject) break;
3783 eptr++;
3784 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3785 }
3786 }
3787 }
3788
3789 /* Handle unlimited UTF-8 repeat */
3790
3791 else
3792 {
3793 if ((ims & PCRE_DOTALL) == 0)
3794 {
3795 for (i = min; i < max; i++)
3796 {
3797 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3798 eptr++;
3799 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3800 }
3801 }
3802 else
3803 {
3804 eptr = md->end_subject;
3805 }
3806 }
3807 break;
3808
3809 /* The byte case is the same as non-UTF8 */
3810
3811 case OP_ANYBYTE:
3812 c = max - min;
3813 if (c > (unsigned int)(md->end_subject - eptr))
3814 c = md->end_subject - eptr;
3815 eptr += c;
3816 break;
3817
3818 case OP_ANYNL:
3819 for (i = min; i < max; i++)
3820 {
3821 int len = 1;
3822 if (eptr >= md->end_subject) break;
3823 GETCHARLEN(c, eptr, len);
3824 if (c == 0x000d)
3825 {
3826 if (++eptr >= md->end_subject) break;
3827 if (*eptr == 0x000a) eptr++;
3828 }
3829 else
3830 {
3831 if (c != 0x000a && c != 0x000b && c != 0x000c &&
3832 c != 0x0085 && c != 0x2028 && c != 0x2029)
3833 break;
3834 eptr += len;
3835 }
3836 }
3837 break;
3838
3839 case OP_NOT_HSPACE:
3840 case OP_HSPACE:
3841 for (i = min; i < max; i++)
3842 {
3843 BOOL gotspace;
3844 int len = 1;
3845 if (eptr >= md->end_subject) break;
3846 GETCHARLEN(c, eptr, len);
3847 switch(c)
3848 {
3849 default: gotspace = FALSE; break;
3850 case 0x09: /* HT */
3851 case 0x20: /* SPACE */
3852 case 0xa0: /* NBSP */
3853 case 0x1680: /* OGHAM SPACE MARK */
3854 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
3855 case 0x2000: /* EN QUAD */
3856 case 0x2001: /* EM QUAD */
3857 case 0x2002: /* EN SPACE */
3858 case 0x2003: /* EM SPACE */
3859 case 0x2004: /* THREE-PER-EM SPACE */
3860 case 0x2005: /* FOUR-PER-EM SPACE */
3861 case 0x2006: /* SIX-PER-EM SPACE */
3862 case 0x2007: /* FIGURE SPACE */
3863 case 0x2008: /* PUNCTUATION SPACE */
3864 case 0x2009: /* THIN SPACE */
3865 case 0x200A: /* HAIR SPACE */
3866 case 0x202f: /* NARROW NO-BREAK SPACE */
3867 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
3868 case 0x3000: /* IDEOGRAPHIC SPACE */
3869 gotspace = TRUE;
3870 break;
3871 }
3872 if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3873 eptr += len;
3874 }
3875 break;
3876
3877 case OP_NOT_VSPACE:
3878 case OP_VSPACE:
3879 for (i = min; i < max; i++)
3880 {
3881 BOOL gotspace;
3882 int len = 1;
3883 if (eptr >= md->end_subject) break;
3884 GETCHARLEN(c, eptr, len);
3885 switch(c)
3886 {
3887 default: gotspace = FALSE; break;
3888 case 0x0a: /* LF */
3889 case 0x0b: /* VT */
3890 case 0x0c: /* FF */
3891 case 0x0d: /* CR */
3892 case 0x85: /* NEL */
3893 case 0x2028: /* LINE SEPARATOR */
3894 case 0x2029: /* PARAGRAPH SEPARATOR */
3895 gotspace = TRUE;
3896 break;
3897 }
3898 if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3899 eptr += len;
3900 }
3901 break;
3902
3903 case OP_NOT_DIGIT:
3904 for (i = min; i < max; i++)
3905 {
3906 int len = 1;
3907 if (eptr >= md->end_subject) break;
3908 GETCHARLEN(c, eptr, len);
3909 if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
3910 eptr+= len;
3911 }
3912 break;
3913
3914 case OP_DIGIT:
3915 for (i = min; i < max; i++)
3916 {
3917 int len = 1;
3918 if (eptr >= md->end_subject) break;
3919 GETCHARLEN(c, eptr, len);
3920 if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
3921 eptr+= len;
3922 }
3923 break;
3924
3925 case OP_NOT_WHITESPACE:
3926 for (i = min; i < max; i++)
3927 {
3928 int len = 1;
3929 if (eptr >= md->end_subject) break;
3930 GETCHARLEN(c, eptr, len);
3931 if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
3932 eptr+= len;
3933 }
3934 break;
3935
3936 case OP_WHITESPACE:
3937 for (i = min; i < max; i++)
3938 {
3939 int len = 1;
3940 if (eptr >= md->end_subject) break;
3941 GETCHARLEN(c, eptr, len);
3942 if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
3943 eptr+= len;
3944 }
3945 break;
3946
3947 case OP_NOT_WORDCHAR:
3948 for (i = min; i < max; i++)
3949 {
3950 int len = 1;
3951 if (eptr >= md->end_subject) break;
3952 GETCHARLEN(c, eptr, len);
3953 if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
3954 eptr+= len;
3955 }
3956 break;
3957
3958 case OP_WORDCHAR:
3959 for (i = min; i < max; i++)
3960 {
3961 int len = 1;
3962 if (eptr >= md->end_subject) break;
3963 GETCHARLEN(c, eptr, len);
3964 if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
3965 eptr+= len;
3966 }
3967 break;
3968
3969 default:
3970 RRETURN(PCRE_ERROR_INTERNAL);
3971 }
3972
3973 /* eptr is now past the end of the maximum run */
3974
3975 if (possessive) continue;
3976 for(;;)
3977 {
3978 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3979 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3980 if (eptr-- == pp) break; /* Stop if tried at original pos */
3981 BACKCHAR(eptr);
3982 }
3983 }
3984 else
3985 #endif
3986
3987 /* Not UTF-8 mode */
3988 {
3989 switch(ctype)
3990 {
3991 case OP_ANY:
3992 if ((ims & PCRE_DOTALL) == 0)
3993 {
3994 for (i = min; i < max; i++)
3995 {
3996 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3997 eptr++;
3998 }
3999 break;
4000 }
4001 /* For DOTALL case, fall through and treat as \C */
4002
4003 case OP_ANYBYTE:
4004 c = max - min;
4005 if (c > (unsigned int)(md->end_subject - eptr))
4006 c = md->end_subject - eptr;
4007 eptr += c;
4008 break;
4009
4010 case OP_ANYNL:
4011 for (i = min; i < max; i++)
4012 {
4013 if (eptr >= md->end_subject) break;
4014 c = *eptr;
4015 if (c == 0x000d)
4016 {
4017 if (++eptr >= md->end_subject) break;
4018 if (*eptr == 0x000a) eptr++;
4019 }
4020 else
4021 {
4022 if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
4023 break;
4024 eptr++;
4025 }
4026 }
4027 break;
4028
4029 case OP_NOT_HSPACE:
4030 for (i = min; i < max; i++)
4031 {
4032 if (eptr >= md->end_subject) break;
4033 c = *eptr;
4034 if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4035 eptr++;
4036 }
4037 break;
4038
4039 case OP_HSPACE:
4040 for (i = min; i < max; i++)
4041 {
4042 if (eptr >= md->end_subject) break;
4043 c = *eptr;
4044 if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4045 eptr++;
4046 }
4047 break;
4048
4049 case OP_NOT_VSPACE:
4050 for (i = min; i < max; i++)
4051 {
4052 if (eptr >= md->end_subject) break;
4053 c = *eptr;
4054 if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4055 break;
4056 eptr++;
4057 }
4058 break;
4059
4060 case OP_VSPACE:
4061 for (i = min; i < max; i++)
4062 {
4063 if (eptr >= md->end_subject) break;
4064 c = *eptr;
4065 if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4066 break;
4067 eptr++;
4068 }
4069 break;
4070
4071 case OP_NOT_DIGIT:
4072 for (i = min; i < max; i++)
4073 {
4074 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4075 break;
4076 eptr++;
4077 }
4078 break;
4079
4080 case OP_DIGIT:
4081 for (i = min; i < max; i++)
4082 {
4083 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4084 break;
4085 eptr++;
4086 }
4087 break;
4088
4089 case OP_NOT_WHITESPACE:
4090 for (i = min; i < max; i++)
4091 {
4092 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4093 break;
4094 eptr++;
4095 }
4096 break;
4097
4098 case OP_WHITESPACE:
4099 for (i = min; i < max; i++)
4100 {
4101 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4102 break;
4103 eptr++;
4104 }
4105 break;
4106
4107 case OP_NOT_WORDCHAR:
4108 for (i = min; i < max; i++)
4109 {
4110 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4111 break;
4112 eptr++;
4113 }
4114 break;
4115
4116 case OP_WORDCHAR:
4117 for (i = min; i < max; i++)
4118 {
4119 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4120 break;
4121 eptr++;
4122 }
4123 break;
4124
4125 default:
4126 RRETURN(PCRE_ERROR_INTERNAL);
4127 }
4128
4129 /* eptr is now past the end of the maximum run */
4130
4131 if (possessive) continue;
4132 while (eptr >= pp)
4133 {
4134 RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4135 eptr--;
4136 if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4137 }
4138 }
4139
4140 /* Get here if we can't make it match with any permitted repetitions */
4141
4142 RRETURN(MATCH_NOMATCH);
4143 }
4144 /* Control never gets here */
4145
4146 /* There's been some horrible disaster. Arrival here can only mean there is
4147 something seriously wrong in the code above or the OP_xxx definitions. */
4148
4149 default:
4150 DPRINTF(("Unknown opcode %d\n", *ecode));
4151 RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
4152 }
4153
4154 /* Do not stick any code in here without much thought; it is assumed
4155 that "continue" in the code above comes out to here to repeat the main
4156 loop. */
4157
4158 } /* End of main loop */
4159 /* Control never reaches here */
4160
4161
4162 /* When compiling to use the heap rather than the stack for recursive calls to
4163 match(), the RRETURN() macro jumps here. The number that is saved in
4164 frame->Xwhere indicates which label we actually want to return to. */
4165
4166 #ifdef NO_RECURSE
4167 #define LBL(val) case val: goto L_RM##val;
4168 HEAP_RETURN:
4169 switch (frame->Xwhere)
4170 {
4171 LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4172 LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4173 LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4174 LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4175 LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4176 LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4177 default:
4178 DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4179 return PCRE_ERROR_INTERNAL;
4180 }
4181 #undef LBL
4182 #endif /* NO_RECURSE */
4183 }
4184
4185
4186 /***************************************************************************
4187 ****************************************************************************
4188 RECURSION IN THE match() FUNCTION
4189
4190 Undefine all the macros that were defined above to handle this. */
4191
4192 #ifdef NO_RECURSE
4193 #undef eptr
4194 #undef ecode
4195 #undef mstart
4196 #undef offset_top
4197 #undef ims
4198 #undef eptrb
4199 #undef flags
4200
4201 #undef callpat
4202 #undef charptr
4203 #undef data
4204 #undef next
4205 #undef pp
4206 #undef prev
4207 #undef saved_eptr
4208
4209 #undef new_recursive
4210
4211 #undef cur_is_word
4212 #undef condition
4213 #undef prev_is_word
4214
4215 #undef original_ims
4216
4217 #undef ctype
4218 #undef length
4219 #undef max
4220 #undef min
4221 #undef number
4222 #undef offset
4223 #undef op
4224 #undef save_capture_last
4225 #undef save_offset1
4226 #undef save_offset2
4227 #undef save_offset3
4228 #undef stacksave
4229
4230 #undef newptrb
4231
4232 #endif
4233
4234 /* These two are defined as macros in both cases */
4235
4236 #undef fc
4237 #undef fi
4238
4239 /***************************************************************************
4240 ***************************************************************************/
4241
4242
4243
4244 /*************************************************
4245 * Execute a Regular Expression *
4246 *************************************************/
4247
4248 /* This function applies a compiled re to a subject string and picks out
4249 portions of the string if it matches. Two elements in the vector are set for
4250 each substring: the offsets to the start and end of the substring.
4251
4252 Arguments:
4253 argument_re points to the compiled expression
4254 extra_data points to extra data or is NULL
4255 subject points to the subject string
4256 length length of subject string (may contain binary zeros)
4257 start_offset where to start in the subject string
4258 options option bits
4259 offsets points to a vector of ints to be filled in with offsets
4260 offsetcount the number of elements in the vector
4261
4262 Returns: > 0 => success; value is the number of elements filled in
4263 = 0 => success, but offsets is not big enough
4264 -1 => failed to match
4265 < -1 => some kind of unexpected problem
4266 */
4267
4268 PCRE_EXP_DEFN int
4269 pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4270 PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4271 int offsetcount)
4272 {
4273 int rc, resetcount, ocount;
4274 int first_byte = -1;
4275 int req_byte = -1;
4276 int req_byte2 = -1;
4277 int newline;
4278 unsigned long int ims;
4279 BOOL using_temporary_offsets = FALSE;
4280 BOOL anchored;
4281 BOOL startline;
4282 BOOL firstline;
4283 BOOL first_byte_caseless = FALSE;
4284 BOOL req_byte_caseless = FALSE;
4285 BOOL utf8;
4286 match_data match_block;
4287 match_data *md = &match_block;
4288 const uschar *tables;
4289 const uschar *start_bits = NULL;
4290 USPTR start_match = (USPTR)subject + start_offset;
4291 USPTR end_subject;
4292 USPTR req_byte_ptr = start_match - 1;
4293 eptrblock eptrchain[EPTR_WORK_SIZE];
4294
4295 pcre_study_data internal_study;
4296 const pcre_study_data *study;
4297
4298 real_pcre internal_re;
4299 const real_pcre *external_re = (const real_pcre *)argument_re;
4300 const real_pcre *re = external_re;
4301
4302 /* Plausibility checks */
4303
4304 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4305 if (re == NULL || subject == NULL ||
4306 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4307 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
4308
4309 /* Fish out the optional data from the extra_data structure, first setting
4310 the default values. */
4311
4312 study = NULL;
4313 md->match_limit = MATCH_LIMIT;
4314 md->match_limit_recursion = MATCH_LIMIT_RECURSION;
4315 md->callout_data = NULL;
4316
4317 /* The table pointer is always in native byte order. */
4318
4319 tables = external_re->tables;
4320
4321 if (extra_data != NULL)
4322 {
4323 register unsigned int flags = extra_data->flags;
4324 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
4325 study = (const pcre_study_data *)extra_data->study_data;
4326 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
4327 md->match_limit = extra_data->match_limit;
4328 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
4329 md->match_limit_recursion = extra_data->match_limit_recursion;
4330 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
4331 md->callout_data = extra_data->callout_data;
4332 if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
4333 }
4334
4335 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
4336 is a feature that makes it possible to save compiled regex and re-use them
4337 in other programs later. */
4338
4339 if (tables == NULL) tables = _pcre_default_tables;
4340
4341 /* Check that the first field in the block is the magic number. If it is not,
4342 test for a regex that was compiled on a host of opposite endianness. If this is
4343 the case, flipped values are put in internal_re and internal_study if there was
4344 study data too. */
4345
4346 if (re->magic_number != MAGIC_NUMBER)
4347 {
4348 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
4349 if (re == NULL) return PCRE_ERROR_BADMAGIC;
4350 if (study != NULL) study = &internal_study;
4351 }
4352
4353 /* Set up other data */
4354
4355 anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4356 startline = (re->options & PCRE_STARTLINE) != 0;
4357 firstline = (re->options & PCRE_FIRSTLINE) != 0;
4358
4359 /* The code starts after the real_pcre block and the capture name table. */
4360
4361 md->start_code = (const uschar *)external_re + re->name_table_offset +
4362 re->name_count * re->name_entry_size;
4363
4364 md->start_subject = (USPTR)subject;
4365 md->start_offset = start_offset;
4366 md->end_subject = md->start_subject + length;
4367 end_subject = md->end_subject;
4368
4369 md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4370 utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4371
4372 md->notbol = (options & PCRE_NOTBOL) != 0;
4373 md->noteol = (options & PCRE_NOTEOL) != 0;
4374 md->notempty = (options & PCRE_NOTEMPTY) != 0;
4375 md->partial = (options & PCRE_PARTIAL) != 0;
4376 md->hitend = FALSE;
4377
4378 md->recursive = NULL; /* No recursion at top level */
4379 md->eptrchain = eptrchain; /* Make workspace generally available */
4380
4381 md->lcc = tables + lcc_offset;
4382 md->ctypes = tables + ctypes_offset;
4383
4384 /* Handle different types of newline. The three bits give eight cases. If
4385 nothing is set at run time, whatever was used at compile time applies. */
4386
4387 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4388 PCRE_NEWLINE_BITS)
4389 {
4390 case 0: newline = NEWLINE; break; /* Compile-time default */
4391 case PCRE_NEWLINE_CR: newline = '\r'; break;
4392 case PCRE_NEWLINE_LF: newline = '\n'; break;
4393 case PCRE_NEWLINE_CR+
4394 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4395 case PCRE_NEWLINE_ANY: newline = -1; break;
4396 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4397 default: return PCRE_ERROR_BADNEWLINE;
4398 }
4399
4400 if (newline == -2)
4401 {
4402 md->nltype = NLTYPE_ANYCRLF;
4403 }
4404 else if (newline < 0)
4405 {
4406 md->nltype = NLTYPE_ANY;
4407 }
4408 else
4409 {
4410 md->nltype = NLTYPE_FIXED;
4411 if (newline > 255)
4412 {
4413 md->nllen = 2;
4414 md->nl[0] = (newline >> 8) & 255;
4415 md->nl[1] = newline & 255;
4416 }
4417 else
4418 {
4419 md->nllen = 1;
4420 md->nl[0] = newline;
4421 }
4422 }
4423
4424 /* Partial matching is supported only for a restricted set of regexes at the
4425 moment. */
4426
4427 if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
4428 return PCRE_ERROR_BADPARTIAL;
4429
4430 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
4431 back the character offset. */
4432
4433 #ifdef SUPPORT_UTF8
4434 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4435 {
4436 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
4437 return PCRE_ERROR_BADUTF8;
4438 if (start_offset > 0 && start_offset < length)
4439 {
4440 int tb = ((uschar *)subject)[start_offset];
4441 if (tb > 127)
4442 {
4443 tb &= 0xc0;
4444 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
4445 }
4446 }
4447 }
4448 #endif
4449
4450 /* The ims options can vary during the matching as a result of the presence
4451 of (?ims) items in the pattern. They are kept in a local variable so that
4452 restoring at the exit of a group is easy. */
4453
4454 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4455
4456 /* If the expression has got more back references than the offsets supplied can
4457 hold, we get a temporary chunk of working store to use during the matching.
4458 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4459 of 3. */
4460
4461 ocount = offsetcount - (offsetcount % 3);
4462
4463 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4464 {
4465 ocount = re->top_backref * 3 + 3;
4466 md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4467 if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4468 using_temporary_offsets = TRUE;
4469 DPRINTF(("Got memory to hold back references\n"));
4470 }
4471 else md->offset_vector = offsets;
4472
4473 md->offset_end = ocount;
4474 md->offset_max = (2*ocount)/3;
4475 md->offset_overflow = FALSE;
4476 md->capture_last = -1;
4477
4478 /* Compute the minimum number of offsets that we need to reset each time. Doing
4479 this makes a huge difference to execution time when there aren't many brackets
4480 in the pattern. */
4481
4482 resetcount = 2 + re->top_bracket * 2;
4483 if (resetcount > offsetcount) resetcount = ocount;
4484
4485 /* Reset the working variable associated with each extraction. These should
4486 never be used unless previously set, but they get saved and restored, and so we
4487 initialize them to avoid reading uninitialized locations. */
4488
4489 if (md->offset_vector != NULL)
4490 {
4491 register int *iptr = md->offset_vector + ocount;
4492 register int *iend = iptr - resetcount/2 + 1;
4493 while (--iptr >= iend) *iptr = -1;
4494 }
4495
4496 /* Set up the first character to match, if available. The first_byte value is
4497 never set for an anchored regular expression, but the anchoring may be forced
4498 at run time, so we have to test for anchoring. The first char may be unset for
4499 an unanchored pattern, of course. If there's no first char and the pattern was
4500 studied, there may be a bitmap of possible first characters. */
4501
4502 if (!anchored)
4503 {
4504 if ((re->options & PCRE_FIRSTSET) != 0)
4505 {
4506 first_byte = re->first_byte & 255;
4507 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
4508 first_byte = md->lcc[first_byte];
4509 }
4510 else
4511 if (!startline && study != NULL &&
4512 (study->options & PCRE_STUDY_MAPPED) != 0)
4513 start_bits = study->start_bits;
4514 }
4515
4516 /* For anchored or unanchored matches, there may be a "last known required
4517 character" set. */
4518
4519 if ((re->options & PCRE_REQCHSET) != 0)
4520 {
4521 req_byte = re->req_byte & 255;
4522 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
4523 req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */
4524 }
4525
4526
4527 /* ==========================================================================*/
4528
4529 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4530 the loop runs just once. */
4531
4532 for(;;)
4533 {
4534 USPTR save_end_subject = end_subject;
4535
4536 /* Reset the maximum number of extractions we might see. */
4537
4538 if (md->offset_vector != NULL)
4539 {
4540 register int *iptr = md->offset_vector;
4541 register int *iend = iptr + resetcount;
4542 while (iptr < iend) *iptr++ = -1;
4543 }
4544
4545 /* Advance to a unique first char if possible. If firstline is TRUE, the
4546 start of the match is constrained to the first line of a multiline string.
4547 That is, the match must be before or at the first newline. Implement this by
4548 temporarily adjusting end_subject so that we stop scanning at a newline. If
4549 the match fails at the newline, later code breaks this loop. */
4550
4551 if (firstline)
4552 {
4553 USPTR t = start_match;
4554 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4555 end_subject = t;
4556 }
4557
4558 /* Now test for a unique first byte */
4559
4560 if (first_byte >= 0)
4561 {
4562 if (first_byte_caseless)
4563 while (start_match < end_subject &&
4564 md->lcc[*start_match] != first_byte)
4565 start_match++;
4566 else
4567 while (start_match < end_subject && *start_match != first_byte)
4568 start_match++;
4569 }
4570
4571 /* Or to just after a linebreak for a multiline match if possible */
4572
4573 else if (startline)
4574 {
4575 if (start_match > md->start_subject + start_offset)
4576 {
4577 while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4578 start_match++;
4579
4580 /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4581 and we are now at a LF, advance the match position by one more character.
4582 */
4583
4584 if (start_match[-1] == '\r' &&
4585 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4586 start_match < end_subject &&
4587 *start_match == '\n')
4588 start_match++;
4589 }
4590 }
4591
4592 /* Or to a non-unique first char after study */
4593
4594 else if (start_bits != NULL)
4595 {
4596 while (start_match < end_subject)
4597 {
4598 register unsigned int c = *start_match;
4599 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4600 }
4601 }
4602
4603 /* Restore fudged end_subject */
4604
4605 end_subject = save_end_subject;
4606
4607 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4608 printf(">>>> Match against: ");
4609 pchars(start_match, end_subject - start_match, TRUE, md);
4610 printf("\n");
4611 #endif
4612
4613 /* If req_byte is set, we know that that character must appear in the subject
4614 for the match to succeed. If the first character is set, req_byte must be
4615 later in the subject; otherwise the test starts at the match point. This
4616 optimization can save a huge amount of backtracking in patterns with nested
4617 unlimited repeats that aren't going to match. Writing separate code for
4618 cased/caseless versions makes it go faster, as does using an autoincrement
4619 and backing off on a match.
4620
4621 HOWEVER: when the subject string is very, very long, searching to its end can
4622 take a long time, and give bad performance on quite ordinary patterns. This
4623 showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4624 string... so we don't do this when the string is sufficiently long.
4625
4626 ALSO: this processing is disabled when partial matching is requested.
4627 */
4628
4629 if (req_byte >= 0 &&
4630 end_subject - start_match < REQ_BYTE_MAX &&
4631 !md->partial)
4632 {
4633 register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4634
4635 /* We don't need to repeat the search if we haven't yet reached the
4636 place we found it at last time. */
4637
4638 if (p > req_byte_ptr)
4639 {
4640 if (req_byte_caseless)
4641 {
4642 while (p < end_subject)
4643 {
4644 register int pp = *p++;
4645 if (pp == req_byte || pp == req_byte2) { p--; break; }
4646 }
4647 }
4648 else
4649 {
4650 while (p < end_subject)
4651 {
4652 if (*p++ == req_byte) { p--; break; }
4653 }
4654 }
4655
4656 /* If we can't find the required character, break the matching loop,
4657 forcing a match failure. */
4658
4659 if (p >= end_subject)
4660 {
4661 rc = MATCH_NOMATCH;
4662 break;
4663 }
4664
4665 /* If we have found the required character, save the point where we
4666 found it, so that we don't search again next time round the loop if
4667 the start hasn't passed this character yet. */
4668
4669 req_byte_ptr = p;
4670 }
4671 }
4672
4673 /* OK, we can now run the match. */
4674
4675 md->start_match_ptr = start_match; /* Insurance */
4676 md->match_call_count = 0;
4677 md->eptrn = 0; /* Next free eptrchain slot */
4678 rc = match(start_match, md->start_code, start_match, 2, md,
4679 ims, NULL, 0, 0);
4680
4681 /* Any return other than MATCH_NOMATCH breaks the loop. */
4682
4683 if (rc != MATCH_NOMATCH) break;
4684
4685 /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4686 newline in the subject (though it may continue over the newline). Therefore,
4687 if we have just failed to match, starting at a newline, do not continue. */
4688
4689 if (firstline && IS_NEWLINE(start_match)) break;
4690
4691 /* Advance the match position by one character. */
4692
4693 start_match++;
4694 #ifdef SUPPORT_UTF8
4695 if (utf8)
4696 while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4697 start_match++;
4698 #endif
4699
4700 /* Break the loop if the pattern is anchored or if we have passed the end of
4701 the subject. */
4702
4703 if (anchored || start_match > end_subject) break;
4704
4705 /* If we have just passed a CR and the newline option is CRLF or ANY or
4706 ANYCRLF, and we are now at a LF, advance the match position by one more
4707 character. */
4708
4709 if (start_match[-1] == '\r' &&
4710 (md->nltype == NLTYPE_ANY ||
4711 md->nltype == NLTYPE_ANYCRLF ||
4712 md->nllen == 2) &&
4713 start_match < end_subject &&
4714 *start_match == '\n')
4715 start_match++;
4716
4717 } /* End of for(;;) "bumpalong" loop */
4718
4719 /* ==========================================================================*/
4720
4721 /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4722 conditions is true:
4723
4724 (1) The pattern is anchored;
4725
4726 (2) We are past the end of the subject;
4727
4728 (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4729 this option requests that a match occur at or before the first newline in
4730 the subject.
4731
4732 When we have a match and the offset vector is big enough to deal with any
4733 backreferences, captured substring offsets will already be set up. In the case
4734 where we had to get some local store to hold offsets for backreference
4735 processing, copy those that we can. In this case there need not be overflow if
4736 certain parts of the pattern were not used, even though there are more
4737 capturing parentheses than vector slots. */
4738
4739 if (rc == MATCH_MATCH)
4740 {
4741 if (using_temporary_offsets)
4742 {
4743 if (offsetcount >= 4)
4744 {
4745 memcpy(offsets + 2, md->offset_vector + 2,
4746 (offsetcount - 2) * sizeof(int));
4747 DPRINTF(("Copied offsets from temporary memory\n"));
4748 }
4749 if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
4750 DPRINTF(("Freeing temporary memory\n"));
4751 (pcre_free)(md->offset_vector);
4752 }
4753
4754 /* Set the return code to the number of captured strings, or 0 if there are
4755 too many to fit into the vector. */
4756
4757 rc = md->offset_overflow? 0 : md->end_offset_top/2;
4758
4759 /* If there is space, set up the whole thing as substring 0. The value of
4760 md->start_match_ptr might be modified if \K was encountered on the success
4761 matching path. */
4762
4763 if (offsetcount < 2) rc = 0; else
4764 {
4765 offsets[0] = md->start_match_ptr - md->start_subject;
4766 offsets[1] = md->end_match_ptr - md->start_subject;
4767 }
4768
4769 DPRINTF((">>>> returning %d\n", rc));
4770 return rc;
4771 }
4772
4773 /* Control gets here if there has been an error, or if the overall match
4774 attempt has failed at all permitted starting positions. */
4775
4776 if (using_temporary_offsets)
4777 {
4778 DPRINTF(("Freeing temporary memory\n"));
4779 (pcre_free)(md->offset_vector);
4780 }
4781
4782 if (rc != MATCH_NOMATCH)
4783 {
4784 DPRINTF((">>>> error: returning %d\n", rc));
4785 return rc;
4786 }
4787 else if (md->partial && md->hitend)
4788 {
4789 DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4790 return PCRE_ERROR_PARTIAL;
4791 }
4792 else
4793 {
4794 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
4795 return PCRE_ERROR_NOMATCH;
4796 }
4797 }
4798
4799 /* End of pcre_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12