/[pcre]/code/tags/pcre-4.0/pcre.c
ViewVC logotype

Contents of /code/tags/pcre-4.0/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 39 - (show annotations) (download)
Sat Feb 24 21:39:13 2007 UTC (7 years, 1 month ago) by nigel
Original Path: code/trunk/pcre.c
File MIME type: text/plain
File size: 131999 byte(s)
Load pcre-2.08 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 Written by: Philip Hazel <ph10@cam.ac.uk>
11
12 Copyright (c) 1997-1999 University of Cambridge
13
14 -----------------------------------------------------------------------------
15 Permission is granted to anyone to use this software for any purpose on any
16 computer system, and to redistribute it freely, subject to the following
17 restrictions:
18
19 1. This software is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22
23 2. The origin of this software must not be misrepresented, either by
24 explicit claim or by omission.
25
26 3. Altered versions must be plainly marked as such, and must not be
27 misrepresented as being the original software.
28
29 4. If PCRE is embedded in any software that is released under the GNU
30 General Purpose Licence (GPL), then the terms of that licence shall
31 supersede any condition above with which it is incompatible.
32 -----------------------------------------------------------------------------
33 */
34
35
36 /* Define DEBUG to get debugging output on stdout. */
37
38 /* #define DEBUG */
39
40 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41 inline, and there are *still* stupid compilers about that don't like indented
42 pre-processor statements. I suppose it's only been 10 years... */
43
44 #ifdef DEBUG
45 #define DPRINTF(p) printf p
46 #else
47 #define DPRINTF(p) /*nothing*/
48 #endif
49
50 /* Include the internals header, which itself includes Standard C headers plus
51 the external pcre header. */
52
53 #include "internal.h"
54
55
56 /* Allow compilation as C++ source code, should anybody want to do that. */
57
58 #ifdef __cplusplus
59 #define class pcre_class
60 #endif
61
62
63 /* Number of items on the nested bracket stacks at compile time. This should
64 not be set greater than 200. */
65
66 #define BRASTACK_SIZE 200
67
68
69 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70
71 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73
74 /* Text forms of OP_ values and things, for debugging (not all used) */
75
76 #ifdef DEBUG
77 static const char *OP_names[] = {
78 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80 "Opt", "^", "$", "Any", "chars", "not",
81 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84 "*", "*?", "+", "+?", "?", "??", "{", "{",
85 "class", "Ref",
86 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88 "Brazero", "Braminzero", "Bra"
89 };
90 #endif
91
92 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
93 are simple data values; negative values are for special things like \d and so
94 on. Zero means further processing is needed (for things like \x), or the escape
95 is invalid. */
96
97 static const short int escapes[] = {
98 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
99 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
100 '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
101 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
102 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
103 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
104 '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
105 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
106 0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
107 0, 0, -ESC_z /* x - z */
108 };
109
110 /* Definition to allow mutual recursion */
111
112 static BOOL
113 compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114 BOOL, int, int *, int *, compile_data *);
115
116
117
118 /*************************************************
119 * Global variables *
120 *************************************************/
121
122 /* PCRE is thread-clean and doesn't use any global variables in the normal
123 sense. However, it calls memory allocation and free functions via the two
124 indirections below, which are can be changed by the caller, but are shared
125 between all threads. */
126
127 void *(*pcre_malloc)(size_t) = malloc;
128 void (*pcre_free)(void *) = free;
129
130
131
132
133 /*************************************************
134 * Default character tables *
135 *************************************************/
136
137 /* A default set of character tables is included in the PCRE binary. Its source
138 is built by the maketables auxiliary program, which uses the default C ctypes
139 functions, and put in the file chartables.c. These tables are used by PCRE
140 whenever the caller of pcre_compile() does not provide an alternate set of
141 tables. */
142
143 #include "chartables.c"
144
145
146
147 /*************************************************
148 * Return version string *
149 *************************************************/
150
151 #define STRING(a) # a
152 #define XSTRING(s) STRING(s)
153
154 const char *
155 pcre_version(void)
156 {
157 return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
158 }
159
160
161
162
163 /*************************************************
164 * Return info about a compiled pattern *
165 *************************************************/
166
167 /* This function picks potentially useful data out of the private
168 structure. The public options are passed back in an int - though the
169 re->options field has been expanded to a long int, all the public options
170 at the low end of it, and so even on 16-bit systems this will still be OK.
171 Therefore, I haven't changed the API for pcre_info().
172
173 Arguments:
174 external_re points to compiled code
175 optptr where to pass back the options
176 first_char where to pass back the first character,
177 or -1 if multiline and all branches start ^,
178 or -2 otherwise
179
180 Returns: number of identifying extraction brackets
181 or negative values on error
182 */
183
184 int
185 pcre_info(const pcre *external_re, int *optptr, int *first_char)
186 {
187 const real_pcre *re = (const real_pcre *)external_re;
188 if (re == NULL) return PCRE_ERROR_NULL;
189 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
190 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
191 if (first_char != NULL)
192 *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
193 ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
194 return re->top_bracket;
195 }
196
197
198
199
200 #ifdef DEBUG
201 /*************************************************
202 * Debugging function to print chars *
203 *************************************************/
204
205 /* Print a sequence of chars in printable format, stopping at the end of the
206 subject if the requested.
207
208 Arguments:
209 p points to characters
210 length number to print
211 is_subject TRUE if printing from within md->start_subject
212 md pointer to matching data block, if is_subject is TRUE
213
214 Returns: nothing
215 */
216
217 static void
218 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
219 {
220 int c;
221 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
222 while (length-- > 0)
223 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
224 }
225 #endif
226
227
228
229
230 /*************************************************
231 * Handle escapes *
232 *************************************************/
233
234 /* This function is called when a \ has been encountered. It either returns a
235 positive value for a simple escape such as \n, or a negative value which
236 encodes one of the more complicated things such as \d. On entry, ptr is
237 pointing at the \. On exit, it is on the final character of the escape
238 sequence.
239
240 Arguments:
241 ptrptr points to the pattern position pointer
242 errorptr points to the pointer to the error message
243 bracount number of previous extracting brackets
244 options the options bits
245 isclass TRUE if inside a character class
246 cd pointer to char tables block
247
248 Returns: zero or positive => a data character
249 negative => a special escape sequence
250 on error, errorptr is set
251 */
252
253 static int
254 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
255 int options, BOOL isclass, compile_data *cd)
256 {
257 const uschar *ptr = *ptrptr;
258 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
259 int i;
260
261 if (c == 0) *errorptr = ERR1;
262
263 /* Digits or letters may have special meaning; all others are literals. */
264
265 else if (c < '0' || c > 'z') {}
266
267 /* Do an initial lookup in a table. A non-zero result is something that can be
268 returned immediately. Otherwise further processing may be required. */
269
270 else if ((i = escapes[c - '0']) != 0) c = i;
271
272 /* Escapes that need further processing, or are illegal. */
273
274 else
275 {
276 const uschar *oldptr;
277 switch (c)
278 {
279 /* The handling of escape sequences consisting of a string of digits
280 starting with one that is not zero is not straightforward. By experiment,
281 the way Perl works seems to be as follows:
282
283 Outside a character class, the digits are read as a decimal number. If the
284 number is less than 10, or if there are that many previous extracting
285 left brackets, then it is a back reference. Otherwise, up to three octal
286 digits are read to form an escaped byte. Thus \123 is likely to be octal
287 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
288 value is greater than 377, the least significant 8 bits are taken. Inside a
289 character class, \ followed by a digit is always an octal number. */
290
291 case '1': case '2': case '3': case '4': case '5':
292 case '6': case '7': case '8': case '9':
293
294 if (!isclass)
295 {
296 oldptr = ptr;
297 c -= '0';
298 while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
299 c = c * 10 + *(++ptr) - '0';
300 if (c < 10 || c <= bracount)
301 {
302 c = -(ESC_REF + c);
303 break;
304 }
305 ptr = oldptr; /* Put the pointer back and fall through */
306 }
307
308 /* Handle an octal number following \. If the first digit is 8 or 9, Perl
309 generates a binary zero byte and treats the digit as a following literal.
310 Thus we have to pull back the pointer by one. */
311
312 if ((c = *ptr) >= '8')
313 {
314 ptr--;
315 c = 0;
316 break;
317 }
318
319 /* \0 always starts an octal number, but we may drop through to here with a
320 larger first octal digit */
321
322 case '0':
323 c -= '0';
324 while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
325 ptr[1] != '8' && ptr[1] != '9')
326 c = c * 8 + *(++ptr) - '0';
327 break;
328
329 /* Special escapes not starting with a digit are straightforward */
330
331 case 'x':
332 c = 0;
333 while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
334 {
335 ptr++;
336 c = c * 16 + cd->lcc[*ptr] -
337 (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
338 }
339 break;
340
341 case 'c':
342 c = *(++ptr);
343 if (c == 0)
344 {
345 *errorptr = ERR2;
346 return 0;
347 }
348
349 /* A letter is upper-cased; then the 0x40 bit is flipped */
350
351 if (c >= 'a' && c <= 'z') c = cd->fcc[c];
352 c ^= 0x40;
353 break;
354
355 /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
356 other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
357 for Perl compatibility, it is a literal. This code looks a bit odd, but
358 there used to be some cases other than the default, and there may be again
359 in future, so I haven't "optimized" it. */
360
361 default:
362 if ((options & PCRE_EXTRA) != 0) switch(c)
363 {
364 default:
365 *errorptr = ERR3;
366 break;
367 }
368 break;
369 }
370 }
371
372 *ptrptr = ptr;
373 return c;
374 }
375
376
377
378 /*************************************************
379 * Check for counted repeat *
380 *************************************************/
381
382 /* This function is called when a '{' is encountered in a place where it might
383 start a quantifier. It looks ahead to see if it really is a quantifier or not.
384 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
385 where the ddds are digits.
386
387 Arguments:
388 p pointer to the first char after '{'
389 cd pointer to char tables block
390
391 Returns: TRUE or FALSE
392 */
393
394 static BOOL
395 is_counted_repeat(const uschar *p, compile_data *cd)
396 {
397 if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
398 while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
399 if (*p == '}') return TRUE;
400
401 if (*p++ != ',') return FALSE;
402 if (*p == '}') return TRUE;
403
404 if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
405 while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
406 return (*p == '}');
407 }
408
409
410
411 /*************************************************
412 * Read repeat counts *
413 *************************************************/
414
415 /* Read an item of the form {n,m} and return the values. This is called only
416 after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
417 so the syntax is guaranteed to be correct, but we need to check the values.
418
419 Arguments:
420 p pointer to first char after '{'
421 minp pointer to int for min
422 maxp pointer to int for max
423 returned as -1 if no max
424 errorptr points to pointer to error message
425 cd pointer to character tables clock
426
427 Returns: pointer to '}' on success;
428 current ptr on error, with errorptr set
429 */
430
431 static const uschar *
432 read_repeat_counts(const uschar *p, int *minp, int *maxp,
433 const char **errorptr, compile_data *cd)
434 {
435 int min = 0;
436 int max = -1;
437
438 while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
439
440 if (*p == '}') max = min; else
441 {
442 if (*(++p) != '}')
443 {
444 max = 0;
445 while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
446 if (max < min)
447 {
448 *errorptr = ERR4;
449 return p;
450 }
451 }
452 }
453
454 /* Do paranoid checks, then fill in the required variables, and pass back the
455 pointer to the terminating '}'. */
456
457 if (min > 65535 || max > 65535)
458 *errorptr = ERR5;
459 else
460 {
461 *minp = min;
462 *maxp = max;
463 }
464 return p;
465 }
466
467
468
469 /*************************************************
470 * Find the fixed length of a pattern *
471 *************************************************/
472
473 /* Scan a pattern and compute the fixed length of subject that will match it,
474 if the length is fixed. This is needed for dealing with backward assertions.
475
476 Arguments:
477 code points to the start of the pattern (the bracket)
478
479 Returns: the fixed length, or -1 if there is no fixed length
480 */
481
482 static int
483 find_fixedlength(uschar *code)
484 {
485 int length = -1;
486
487 register int branchlength = 0;
488 register uschar *cc = code + 3;
489
490 /* Scan along the opcodes for this branch. If we get to the end of the
491 branch, check the length against that of the other branches. */
492
493 for (;;)
494 {
495 int d;
496 register int op = *cc;
497 if (op >= OP_BRA) op = OP_BRA;
498
499 switch (op)
500 {
501 case OP_BRA:
502 case OP_ONCE:
503 case OP_COND:
504 d = find_fixedlength(cc);
505 if (d < 0) return -1;
506 branchlength += d;
507 do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
508 cc += 3;
509 break;
510
511 /* Reached end of a branch; if it's a ket it is the end of a nested
512 call. If it's ALT it is an alternation in a nested call. If it is
513 END it's the end of the outer call. All can be handled by the same code. */
514
515 case OP_ALT:
516 case OP_KET:
517 case OP_KETRMAX:
518 case OP_KETRMIN:
519 case OP_END:
520 if (length < 0) length = branchlength;
521 else if (length != branchlength) return -1;
522 if (*cc != OP_ALT) return length;
523 cc += 3;
524 branchlength = 0;
525 break;
526
527 /* Skip over assertive subpatterns */
528
529 case OP_ASSERT:
530 case OP_ASSERT_NOT:
531 case OP_ASSERTBACK:
532 case OP_ASSERTBACK_NOT:
533 do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
534 cc += 3;
535 break;
536
537 /* Skip over things that don't match chars */
538
539 case OP_REVERSE:
540 cc++;
541 /* Fall through */
542
543 case OP_CREF:
544 case OP_OPT:
545 cc++;
546 /* Fall through */
547
548 case OP_SOD:
549 case OP_EOD:
550 case OP_EODN:
551 case OP_CIRC:
552 case OP_DOLL:
553 case OP_NOT_WORD_BOUNDARY:
554 case OP_WORD_BOUNDARY:
555 cc++;
556 break;
557
558 /* Handle char strings */
559
560 case OP_CHARS:
561 branchlength += *(++cc);
562 cc += *cc + 1;
563 break;
564
565 /* Handle exact repetitions */
566
567 case OP_EXACT:
568 case OP_TYPEEXACT:
569 branchlength += (cc[1] << 8) + cc[2];
570 cc += 4;
571 break;
572
573 /* Handle single-char matchers */
574
575 case OP_NOT_DIGIT:
576 case OP_DIGIT:
577 case OP_NOT_WHITESPACE:
578 case OP_WHITESPACE:
579 case OP_NOT_WORDCHAR:
580 case OP_WORDCHAR:
581 case OP_ANY:
582 branchlength++;
583 cc++;
584 break;
585
586
587 /* Check a class for variable quantification */
588
589 case OP_CLASS:
590 cc += (*cc == OP_REF)? 2 : 33;
591
592 switch (*cc)
593 {
594 case OP_CRSTAR:
595 case OP_CRMINSTAR:
596 case OP_CRQUERY:
597 case OP_CRMINQUERY:
598 return -1;
599
600 case OP_CRRANGE:
601 case OP_CRMINRANGE:
602 if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
603 branchlength += (cc[1] << 8) + cc[2];
604 cc += 5;
605 break;
606
607 default:
608 branchlength++;
609 }
610 break;
611
612 /* Anything else is variable length */
613
614 default:
615 return -1;
616 }
617 }
618 /* Control never gets here */
619 }
620
621
622
623
624 /*************************************************
625 * Compile one branch *
626 *************************************************/
627
628 /* Scan the pattern, compiling it into the code vector.
629
630 Arguments:
631 options the option bits
632 brackets points to number of brackets used
633 code points to the pointer to the current code point
634 ptrptr points to the current pattern pointer
635 errorptr points to pointer to error message
636 optchanged set to the value of the last OP_OPT item compiled
637 reqchar set to the last literal character required, else -1
638 countlits set to count of mandatory literal characters
639 cd contains pointers to tables
640
641 Returns: TRUE on success
642 FALSE, with *errorptr set on error
643 */
644
645 static BOOL
646 compile_branch(int options, int *brackets, uschar **codeptr,
647 const uschar **ptrptr, const char **errorptr, int *optchanged,
648 int *reqchar, int *countlits, compile_data *cd)
649 {
650 int repeat_type, op_type;
651 int repeat_min, repeat_max;
652 int bravalue, length;
653 int greedy_default, greedy_non_default;
654 int prevreqchar;
655 int condcount = 0;
656 int subcountlits = 0;
657 register int c;
658 register uschar *code = *codeptr;
659 uschar *tempcode;
660 const uschar *ptr = *ptrptr;
661 const uschar *tempptr;
662 uschar *previous = NULL;
663 uschar class[32];
664
665 /* Set up the default and non-default settings for greediness */
666
667 greedy_default = ((options & PCRE_UNGREEDY) != 0);
668 greedy_non_default = greedy_default ^ 1;
669
670 /* Initialize no required char, and count of literals */
671
672 *reqchar = prevreqchar = -1;
673 *countlits = 0;
674
675 /* Switch on next character until the end of the branch */
676
677 for (;; ptr++)
678 {
679 BOOL negate_class;
680 int class_charcount;
681 int class_lastchar;
682 int newoptions;
683 int condref;
684 int subreqchar;
685
686 c = *ptr;
687 if ((options & PCRE_EXTENDED) != 0)
688 {
689 if ((cd->ctypes[c] & ctype_space) != 0) continue;
690 if (c == '#')
691 {
692 while ((c = *(++ptr)) != 0 && c != '\n');
693 continue;
694 }
695 }
696
697 switch(c)
698 {
699 /* The branch terminates at end of string, |, or ). */
700
701 case 0:
702 case '|':
703 case ')':
704 *codeptr = code;
705 *ptrptr = ptr;
706 return TRUE;
707
708 /* Handle single-character metacharacters */
709
710 case '^':
711 previous = NULL;
712 *code++ = OP_CIRC;
713 break;
714
715 case '$':
716 previous = NULL;
717 *code++ = OP_DOLL;
718 break;
719
720 case '.':
721 previous = code;
722 *code++ = OP_ANY;
723 break;
724
725 /* Character classes. These always build a 32-byte bitmap of the permitted
726 characters, except in the special case where there is only one character.
727 For negated classes, we build the map as usual, then invert it at the end.
728 */
729
730 case '[':
731 previous = code;
732 *code++ = OP_CLASS;
733
734 /* If the first character is '^', set the negation flag and skip it. */
735
736 if ((c = *(++ptr)) == '^')
737 {
738 negate_class = TRUE;
739 c = *(++ptr);
740 }
741 else negate_class = FALSE;
742
743 /* Keep a count of chars so that we can optimize the case of just a single
744 character. */
745
746 class_charcount = 0;
747 class_lastchar = -1;
748
749 /* Initialize the 32-char bit map to all zeros. We have to build the
750 map in a temporary bit of store, in case the class contains only 1
751 character, because in that case the compiled code doesn't use the
752 bit map. */
753
754 memset(class, 0, 32 * sizeof(uschar));
755
756 /* Process characters until ] is reached. By writing this as a "do" it
757 means that an initial ] is taken as a data character. */
758
759 do
760 {
761 if (c == 0)
762 {
763 *errorptr = ERR6;
764 goto FAILED;
765 }
766
767 /* Backslash may introduce a single character, or it may introduce one
768 of the specials, which just set a flag. Escaped items are checked for
769 validity in the pre-compiling pass. The sequence \b is a special case.
770 Inside a class (and only there) it is treated as backspace. Elsewhere
771 it marks a word boundary. Other escapes have preset maps ready to
772 or into the one we are building. We assume they have more than one
773 character in them, so set class_count bigger than one. */
774
775 if (c == '\\')
776 {
777 c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
778 if (-c == ESC_b) c = '\b';
779 else if (c < 0)
780 {
781 register const uschar *cbits = cd->cbits;
782 class_charcount = 10;
783 switch (-c)
784 {
785 case ESC_d:
786 for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
787 continue;
788
789 case ESC_D:
790 for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
791 continue;
792
793 case ESC_w:
794 for (c = 0; c < 32; c++)
795 class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
796 continue;
797
798 case ESC_W:
799 for (c = 0; c < 32; c++)
800 class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
801 continue;
802
803 case ESC_s:
804 for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
805 continue;
806
807 case ESC_S:
808 for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
809 continue;
810
811 default:
812 *errorptr = ERR7;
813 goto FAILED;
814 }
815 }
816 /* Fall through if single character */
817 }
818
819 /* A single character may be followed by '-' to form a range. However,
820 Perl does not permit ']' to be the end of the range. A '-' character
821 here is treated as a literal. */
822
823 if (ptr[1] == '-' && ptr[2] != ']')
824 {
825 int d;
826 ptr += 2;
827 d = *ptr;
828
829 if (d == 0)
830 {
831 *errorptr = ERR6;
832 goto FAILED;
833 }
834
835 /* The second part of a range can be a single-character escape, but
836 not any of the other escapes. */
837
838 if (d == '\\')
839 {
840 d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
841 if (d < 0)
842 {
843 if (d == -ESC_b) d = '\b'; else
844 {
845 *errorptr = ERR7;
846 goto FAILED;
847 }
848 }
849 }
850
851 if (d < c)
852 {
853 *errorptr = ERR8;
854 goto FAILED;
855 }
856
857 for (; c <= d; c++)
858 {
859 class[c/8] |= (1 << (c&7));
860 if ((options & PCRE_CASELESS) != 0)
861 {
862 int uc = cd->fcc[c]; /* flip case */
863 class[uc/8] |= (1 << (uc&7));
864 }
865 class_charcount++; /* in case a one-char range */
866 class_lastchar = c;
867 }
868 continue; /* Go get the next char in the class */
869 }
870
871 /* Handle a lone single character - we can get here for a normal
872 non-escape char, or after \ that introduces a single character. */
873
874 class [c/8] |= (1 << (c&7));
875 if ((options & PCRE_CASELESS) != 0)
876 {
877 c = cd->fcc[c]; /* flip case */
878 class[c/8] |= (1 << (c&7));
879 }
880 class_charcount++;
881 class_lastchar = c;
882 }
883
884 /* Loop until ']' reached; the check for end of string happens inside the
885 loop. This "while" is the end of the "do" above. */
886
887 while ((c = *(++ptr)) != ']');
888
889 /* If class_charcount is 1 and class_lastchar is not negative, we saw
890 precisely one character. This doesn't need the whole 32-byte bit map.
891 We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
892 it's negative. */
893
894 if (class_charcount == 1 && class_lastchar >= 0)
895 {
896 if (negate_class)
897 {
898 code[-1] = OP_NOT;
899 }
900 else
901 {
902 code[-1] = OP_CHARS;
903 *code++ = 1;
904 }
905 *code++ = class_lastchar;
906 }
907
908 /* Otherwise, negate the 32-byte map if necessary, and copy it into
909 the code vector. */
910
911 else
912 {
913 if (negate_class)
914 for (c = 0; c < 32; c++) code[c] = ~class[c];
915 else
916 memcpy(code, class, 32);
917 code += 32;
918 }
919 break;
920
921 /* Various kinds of repeat */
922
923 case '{':
924 if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
925 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
926 if (*errorptr != NULL) goto FAILED;
927 goto REPEAT;
928
929 case '*':
930 repeat_min = 0;
931 repeat_max = -1;
932 goto REPEAT;
933
934 case '+':
935 repeat_min = 1;
936 repeat_max = -1;
937 goto REPEAT;
938
939 case '?':
940 repeat_min = 0;
941 repeat_max = 1;
942
943 REPEAT:
944 if (previous == NULL)
945 {
946 *errorptr = ERR9;
947 goto FAILED;
948 }
949
950 /* If the next character is '?' this is a minimizing repeat, by default,
951 but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
952 next character. */
953
954 if (ptr[1] == '?')
955 { repeat_type = greedy_non_default; ptr++; }
956 else repeat_type = greedy_default;
957
958 /* If previous was a string of characters, chop off the last one and use it
959 as the subject of the repeat. If there was only one character, we can
960 abolish the previous item altogether. A repeat with a zero minimum wipes
961 out any reqchar setting, backing up to the previous value. We must also
962 adjust the countlits value. */
963
964 if (*previous == OP_CHARS)
965 {
966 int len = previous[1];
967
968 if (repeat_min == 0) *reqchar = prevreqchar;
969 *countlits += repeat_min - 1;
970
971 if (len == 1)
972 {
973 c = previous[2];
974 code = previous;
975 }
976 else
977 {
978 c = previous[len+1];
979 previous[1]--;
980 code--;
981 }
982 op_type = 0; /* Use single-char op codes */
983 goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
984 }
985
986 /* If previous was a single negated character ([^a] or similar), we use
987 one of the special opcodes, replacing it. The code is shared with single-
988 character repeats by adding a suitable offset into repeat_type. */
989
990 else if ((int)*previous == OP_NOT)
991 {
992 op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
993 c = previous[1];
994 code = previous;
995 goto OUTPUT_SINGLE_REPEAT;
996 }
997
998 /* If previous was a character type match (\d or similar), abolish it and
999 create a suitable repeat item. The code is shared with single-character
1000 repeats by adding a suitable offset into repeat_type. */
1001
1002 else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1003 {
1004 op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
1005 c = *previous;
1006 code = previous;
1007
1008 OUTPUT_SINGLE_REPEAT:
1009
1010 /* If the maximum is zero then the minimum must also be zero; Perl allows
1011 this case, so we do too - by simply omitting the item altogether. */
1012
1013 if (repeat_max == 0) goto END_REPEAT;
1014
1015 /* Combine the op_type with the repeat_type */
1016
1017 repeat_type += op_type;
1018
1019 /* A minimum of zero is handled either as the special case * or ?, or as
1020 an UPTO, with the maximum given. */
1021
1022 if (repeat_min == 0)
1023 {
1024 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
1025 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
1026 else
1027 {
1028 *code++ = OP_UPTO + repeat_type;
1029 *code++ = repeat_max >> 8;
1030 *code++ = (repeat_max & 255);
1031 }
1032 }
1033
1034 /* The case {1,} is handled as the special case + */
1035
1036 else if (repeat_min == 1 && repeat_max == -1)
1037 *code++ = OP_PLUS + repeat_type;
1038
1039 /* The case {n,n} is just an EXACT, while the general case {n,m} is
1040 handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
1041
1042 else
1043 {
1044 if (repeat_min != 1)
1045 {
1046 *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
1047 *code++ = repeat_min >> 8;
1048 *code++ = (repeat_min & 255);
1049 }
1050
1051 /* If the mininum is 1 and the previous item was a character string,
1052 we either have to put back the item that got cancelled if the string
1053 length was 1, or add the character back onto the end of a longer
1054 string. For a character type nothing need be done; it will just get
1055 put back naturally. Note that the final character is always going to
1056 get added below. */
1057
1058 else if (*previous == OP_CHARS)
1059 {
1060 if (code == previous) code += 2; else previous[1]++;
1061 }
1062
1063 /* For a single negated character we also have to put back the
1064 item that got cancelled. */
1065
1066 else if (*previous == OP_NOT) code++;
1067
1068 /* If the maximum is unlimited, insert an OP_STAR. */
1069
1070 if (repeat_max < 0)
1071 {
1072 *code++ = c;
1073 *code++ = OP_STAR + repeat_type;
1074 }
1075
1076 /* Else insert an UPTO if the max is greater than the min. */
1077
1078 else if (repeat_max != repeat_min)
1079 {
1080 *code++ = c;
1081 repeat_max -= repeat_min;
1082 *code++ = OP_UPTO + repeat_type;
1083 *code++ = repeat_max >> 8;
1084 *code++ = (repeat_max & 255);
1085 }
1086 }
1087
1088 /* The character or character type itself comes last in all cases. */
1089
1090 *code++ = c;
1091 }
1092
1093 /* If previous was a character class or a back reference, we put the repeat
1094 stuff after it, but just skip the item if the repeat was {0,0}. */
1095
1096 else if (*previous == OP_CLASS || *previous == OP_REF)
1097 {
1098 if (repeat_max == 0)
1099 {
1100 code = previous;
1101 goto END_REPEAT;
1102 }
1103 if (repeat_min == 0 && repeat_max == -1)
1104 *code++ = OP_CRSTAR + repeat_type;
1105 else if (repeat_min == 1 && repeat_max == -1)
1106 *code++ = OP_CRPLUS + repeat_type;
1107 else if (repeat_min == 0 && repeat_max == 1)
1108 *code++ = OP_CRQUERY + repeat_type;
1109 else
1110 {
1111 *code++ = OP_CRRANGE + repeat_type;
1112 *code++ = repeat_min >> 8;
1113 *code++ = repeat_min & 255;
1114 if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1115 *code++ = repeat_max >> 8;
1116 *code++ = repeat_max & 255;
1117 }
1118 }
1119
1120 /* If previous was a bracket group, we may have to replicate it in certain
1121 cases. */
1122
1123 else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1124 (int)*previous == OP_COND)
1125 {
1126 register int i;
1127 int ketoffset = 0;
1128 int len = code - previous;
1129 uschar *bralink = NULL;
1130
1131 /* If the maximum repeat count is unlimited, find the end of the bracket
1132 by scanning through from the start, and compute the offset back to it
1133 from the current code pointer. There may be an OP_OPT setting following
1134 the final KET, so we can't find the end just by going back from the code
1135 pointer. */
1136
1137 if (repeat_max == -1)
1138 {
1139 register uschar *ket = previous;
1140 do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1141 ketoffset = code - ket;
1142 }
1143
1144 /* The case of a zero minimum is special because of the need to stick
1145 OP_BRAZERO in front of it, and because the group appears once in the
1146 data, whereas in other cases it appears the minimum number of times. For
1147 this reason, it is simplest to treat this case separately, as otherwise
1148 the code gets far too mess. There are several special subcases when the
1149 minimum is zero. */
1150
1151 if (repeat_min == 0)
1152 {
1153 /* If we set up a required char from the bracket, we must back off
1154 to the previous value and reset the countlits value too. */
1155
1156 if (subcountlits > 0)
1157 {
1158 *reqchar = prevreqchar;
1159 *countlits -= subcountlits;
1160 }
1161
1162 /* If the maximum is also zero, we just omit the group from the output
1163 altogether. */
1164
1165 if (repeat_max == 0)
1166 {
1167 code = previous;
1168 goto END_REPEAT;
1169 }
1170
1171 /* If the maximum is 1 or unlimited, we just have to stick in the
1172 BRAZERO and do no more at this point. */
1173
1174 if (repeat_max <= 1)
1175 {
1176 memmove(previous+1, previous, len);
1177 code++;
1178 *previous++ = OP_BRAZERO + repeat_type;
1179 }
1180
1181 /* If the maximum is greater than 1 and limited, we have to replicate
1182 in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1183 The first one has to be handled carefully because it's the original
1184 copy, which has to be moved up. The remainder can be handled by code
1185 that is common with the non-zero minimum case below. We just have to
1186 adjust the value or repeat_max, since one less copy is required. */
1187
1188 else
1189 {
1190 int offset;
1191 memmove(previous+4, previous, len);
1192 code += 4;
1193 *previous++ = OP_BRAZERO + repeat_type;
1194 *previous++ = OP_BRA;
1195
1196 /* We chain together the bracket offset fields that have to be
1197 filled in later when the ends of the brackets are reached. */
1198
1199 offset = (bralink == NULL)? 0 : previous - bralink;
1200 bralink = previous;
1201 *previous++ = offset >> 8;
1202 *previous++ = offset & 255;
1203 }
1204
1205 repeat_max--;
1206 }
1207
1208 /* If the minimum is greater than zero, replicate the group as many
1209 times as necessary, and adjust the maximum to the number of subsequent
1210 copies that we need. */
1211
1212 else
1213 {
1214 for (i = 1; i < repeat_min; i++)
1215 {
1216 memcpy(code, previous, len);
1217 code += len;
1218 }
1219 if (repeat_max > 0) repeat_max -= repeat_min;
1220 }
1221
1222 /* This code is common to both the zero and non-zero minimum cases. If
1223 the maximum is limited, it replicates the group in a nested fashion,
1224 remembering the bracket starts on a stack. In the case of a zero minimum,
1225 the first one was set up above. In all cases the repeat_max now specifies
1226 the number of additional copies needed. */
1227
1228 if (repeat_max >= 0)
1229 {
1230 for (i = repeat_max - 1; i >= 0; i--)
1231 {
1232 *code++ = OP_BRAZERO + repeat_type;
1233
1234 /* All but the final copy start a new nesting, maintaining the
1235 chain of brackets outstanding. */
1236
1237 if (i != 0)
1238 {
1239 int offset;
1240 *code++ = OP_BRA;
1241 offset = (bralink == NULL)? 0 : code - bralink;
1242 bralink = code;
1243 *code++ = offset >> 8;
1244 *code++ = offset & 255;
1245 }
1246
1247 memcpy(code, previous, len);
1248 code += len;
1249 }
1250
1251 /* Now chain through the pending brackets, and fill in their length
1252 fields (which are holding the chain links pro tem). */
1253
1254 while (bralink != NULL)
1255 {
1256 int oldlinkoffset;
1257 int offset = code - bralink + 1;
1258 uschar *bra = code - offset;
1259 oldlinkoffset = (bra[1] << 8) + bra[2];
1260 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1261 *code++ = OP_KET;
1262 *code++ = bra[1] = offset >> 8;
1263 *code++ = bra[2] = (offset & 255);
1264 }
1265 }
1266
1267 /* If the maximum is unlimited, set a repeater in the final copy. We
1268 can't just offset backwards from the current code point, because we
1269 don't know if there's been an options resetting after the ket. The
1270 correct offset was computed above. */
1271
1272 else code[-ketoffset] = OP_KETRMAX + repeat_type;
1273 }
1274
1275 /* Else there's some kind of shambles */
1276
1277 else
1278 {
1279 *errorptr = ERR11;
1280 goto FAILED;
1281 }
1282
1283 /* In all case we no longer have a previous item. */
1284
1285 END_REPEAT:
1286 previous = NULL;
1287 break;
1288
1289
1290 /* Start of nested bracket sub-expression, or comment or lookahead or
1291 lookbehind or option setting or condition. First deal with special things
1292 that can come after a bracket; all are introduced by ?, and the appearance
1293 of any of them means that this is not a referencing group. They were
1294 checked for validity in the first pass over the string, so we don't have to
1295 check for syntax errors here. */
1296
1297 case '(':
1298 newoptions = options;
1299 condref = -1;
1300
1301 if (*(++ptr) == '?')
1302 {
1303 int set, unset;
1304 int *optset;
1305
1306 switch (*(++ptr))
1307 {
1308 case '#': /* Comment; skip to ket */
1309 ptr++;
1310 while (*ptr != ')') ptr++;
1311 continue;
1312
1313 case ':': /* Non-extracting bracket */
1314 bravalue = OP_BRA;
1315 ptr++;
1316 break;
1317
1318 case '(':
1319 bravalue = OP_COND; /* Conditional group */
1320 if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1321 {
1322 condref = *ptr - '0';
1323 while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1324 ptr++;
1325 }
1326 else ptr--;
1327 break;
1328
1329 case '=': /* Positive lookahead */
1330 bravalue = OP_ASSERT;
1331 ptr++;
1332 break;
1333
1334 case '!': /* Negative lookahead */
1335 bravalue = OP_ASSERT_NOT;
1336 ptr++;
1337 break;
1338
1339 case '<': /* Lookbehinds */
1340 switch (*(++ptr))
1341 {
1342 case '=': /* Positive lookbehind */
1343 bravalue = OP_ASSERTBACK;
1344 ptr++;
1345 break;
1346
1347 case '!': /* Negative lookbehind */
1348 bravalue = OP_ASSERTBACK_NOT;
1349 ptr++;
1350 break;
1351
1352 default: /* Syntax error */
1353 *errorptr = ERR24;
1354 goto FAILED;
1355 }
1356 break;
1357
1358 case '>': /* One-time brackets */
1359 bravalue = OP_ONCE;
1360 ptr++;
1361 break;
1362
1363 default: /* Option setting */
1364 set = unset = 0;
1365 optset = &set;
1366
1367 while (*ptr != ')' && *ptr != ':')
1368 {
1369 switch (*ptr++)
1370 {
1371 case '-': optset = &unset; break;
1372
1373 case 'i': *optset |= PCRE_CASELESS; break;
1374 case 'm': *optset |= PCRE_MULTILINE; break;
1375 case 's': *optset |= PCRE_DOTALL; break;
1376 case 'x': *optset |= PCRE_EXTENDED; break;
1377 case 'U': *optset |= PCRE_UNGREEDY; break;
1378 case 'X': *optset |= PCRE_EXTRA; break;
1379
1380 default:
1381 *errorptr = ERR12;
1382 goto FAILED;
1383 }
1384 }
1385
1386 /* Set up the changed option bits, but don't change anything yet. */
1387
1388 newoptions = (options | set) & (~unset);
1389
1390 /* If the options ended with ')' this is not the start of a nested
1391 group with option changes, so the options change at this level. At top
1392 level there is nothing else to be done (the options will in fact have
1393 been set from the start of compiling as a result of the first pass) but
1394 at an inner level we must compile code to change the ims options if
1395 necessary, and pass the new setting back so that it can be put at the
1396 start of any following branches, and when this group ends, a resetting
1397 item can be compiled. */
1398
1399 if (*ptr == ')')
1400 {
1401 if ((options & PCRE_INGROUP) != 0 &&
1402 (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1403 {
1404 *code++ = OP_OPT;
1405 *code++ = *optchanged = newoptions & PCRE_IMS;
1406 }
1407 options = newoptions; /* Change options at this level */
1408 previous = NULL; /* This item can't be repeated */
1409 continue; /* It is complete */
1410 }
1411
1412 /* If the options ended with ':' we are heading into a nested group
1413 with possible change of options. Such groups are non-capturing and are
1414 not assertions of any kind. All we need to do is skip over the ':';
1415 the newoptions value is handled below. */
1416
1417 bravalue = OP_BRA;
1418 ptr++;
1419 }
1420 }
1421
1422 /* Else we have a referencing group; adjust the opcode. */
1423
1424 else
1425 {
1426 if (++(*brackets) > EXTRACT_MAX)
1427 {
1428 *errorptr = ERR13;
1429 goto FAILED;
1430 }
1431 bravalue = OP_BRA + *brackets;
1432 }
1433
1434 /* Process nested bracketed re. Assertions may not be repeated, but other
1435 kinds can be. We copy code into a non-register variable in order to be able
1436 to pass its address because some compilers complain otherwise. Pass in a
1437 new setting for the ims options if they have changed. */
1438
1439 previous = (bravalue >= OP_ONCE)? code : NULL;
1440 *code = bravalue;
1441 tempcode = code;
1442
1443 if (!compile_regex(
1444 options | PCRE_INGROUP, /* Set for all nested groups */
1445 ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1446 newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1447 brackets, /* Bracket level */
1448 &tempcode, /* Where to put code (updated) */
1449 &ptr, /* Input pointer (updated) */
1450 errorptr, /* Where to put an error message */
1451 (bravalue == OP_ASSERTBACK ||
1452 bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1453 condref, /* Condition reference number */
1454 &subreqchar, /* For possible last char */
1455 &subcountlits, /* For literal count */
1456 cd)) /* Tables block */
1457 goto FAILED;
1458
1459 /* At the end of compiling, code is still pointing to the start of the
1460 group, while tempcode has been updated to point past the end of the group
1461 and any option resetting that may follow it. The pattern pointer (ptr)
1462 is on the bracket. */
1463
1464 /* If this is a conditional bracket, check that there are no more than
1465 two branches in the group. */
1466
1467 if (bravalue == OP_COND)
1468 {
1469 uschar *tc = code;
1470 condcount = 0;
1471
1472 do {
1473 condcount++;
1474 tc += (tc[1] << 8) | tc[2];
1475 }
1476 while (*tc != OP_KET);
1477
1478 if (condcount > 2)
1479 {
1480 *errorptr = ERR27;
1481 goto FAILED;
1482 }
1483 }
1484
1485 /* Handle updating of the required character. If the subpattern didn't
1486 set one, leave it as it was. Otherwise, update it for normal brackets of
1487 all kinds, forward assertions, and conditions with two branches. Don't
1488 update the literal count for forward assertions, however. If the bracket
1489 is followed by a quantifier with zero repeat, we have to back off. Hence
1490 the definition of prevreqchar and subcountlits outside the main loop so
1491 that they can be accessed for the back off. */
1492
1493 if (subreqchar > 0 &&
1494 (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1495 (bravalue == OP_COND && condcount == 2)))
1496 {
1497 prevreqchar = *reqchar;
1498 *reqchar = subreqchar;
1499 if (bravalue != OP_ASSERT) *countlits += subcountlits;
1500 }
1501
1502 /* Now update the main code pointer to the end of the group. */
1503
1504 code = tempcode;
1505
1506 /* Error if hit end of pattern */
1507
1508 if (*ptr != ')')
1509 {
1510 *errorptr = ERR14;
1511 goto FAILED;
1512 }
1513 break;
1514
1515 /* Check \ for being a real metacharacter; if not, fall through and handle
1516 it as a data character at the start of a string. Escape items are checked
1517 for validity in the pre-compiling pass. */
1518
1519 case '\\':
1520 tempptr = ptr;
1521 c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1522
1523 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1524 are arranged to be the negation of the corresponding OP_values. For the
1525 back references, the values are ESC_REF plus the reference number. Only
1526 back references and those types that consume a character may be repeated.
1527 We can test for values between ESC_b and ESC_Z for the latter; this may
1528 have to change if any new ones are ever created. */
1529
1530 if (c < 0)
1531 {
1532 if (-c >= ESC_REF)
1533 {
1534 previous = code;
1535 *code++ = OP_REF;
1536 *code++ = -c - ESC_REF;
1537 }
1538 else
1539 {
1540 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1541 *code++ = -c;
1542 }
1543 continue;
1544 }
1545
1546 /* Data character: reset and fall through */
1547
1548 ptr = tempptr;
1549 c = '\\';
1550
1551 /* Handle a run of data characters until a metacharacter is encountered.
1552 The first character is guaranteed not to be whitespace or # when the
1553 extended flag is set. */
1554
1555 NORMAL_CHAR:
1556 default:
1557 previous = code;
1558 *code = OP_CHARS;
1559 code += 2;
1560 length = 0;
1561
1562 do
1563 {
1564 if ((options & PCRE_EXTENDED) != 0)
1565 {
1566 if ((cd->ctypes[c] & ctype_space) != 0) continue;
1567 if (c == '#')
1568 {
1569 while ((c = *(++ptr)) != 0 && c != '\n');
1570 if (c == 0) break;
1571 continue;
1572 }
1573 }
1574
1575 /* Backslash may introduce a data char or a metacharacter. Escaped items
1576 are checked for validity in the pre-compiling pass. Stop the string
1577 before a metaitem. */
1578
1579 if (c == '\\')
1580 {
1581 tempptr = ptr;
1582 c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1583 if (c < 0) { ptr = tempptr; break; }
1584 }
1585
1586 /* Ordinary character or single-char escape */
1587
1588 *code++ = c;
1589 length++;
1590 }
1591
1592 /* This "while" is the end of the "do" above. */
1593
1594 while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1595
1596 /* Update the last character and the count of literals */
1597
1598 prevreqchar = (length > 1)? code[-2] : *reqchar;
1599 *reqchar = code[-1];
1600 *countlits += length;
1601
1602 /* Compute the length and set it in the data vector, and advance to
1603 the next state. */
1604
1605 previous[1] = length;
1606 if (length < 255) ptr--;
1607 break;
1608 }
1609 } /* end of big loop */
1610
1611 /* Control never reaches here by falling through, only by a goto for all the
1612 error states. Pass back the position in the pattern so that it can be displayed
1613 to the user for diagnosing the error. */
1614
1615 FAILED:
1616 *ptrptr = ptr;
1617 return FALSE;
1618 }
1619
1620
1621
1622
1623 /*************************************************
1624 * Compile sequence of alternatives *
1625 *************************************************/
1626
1627 /* On entry, ptr is pointing past the bracket character, but on return
1628 it points to the closing bracket, or vertical bar, or end of string.
1629 The code variable is pointing at the byte into which the BRA operator has been
1630 stored. If the ims options are changed at the start (for a (?ims: group) or
1631 during any branch, we need to insert an OP_OPT item at the start of every
1632 following branch to ensure they get set correctly at run time, and also pass
1633 the new options into every subsequent branch compile.
1634
1635 Argument:
1636 options the option bits
1637 optchanged new ims options to set as if (?ims) were at the start, or -1
1638 for no change
1639 brackets -> int containing the number of extracting brackets used
1640 codeptr -> the address of the current code pointer
1641 ptrptr -> the address of the current pattern pointer
1642 errorptr -> pointer to error message
1643 lookbehind TRUE if this is a lookbehind assertion
1644 condref > 0 for OPT_CREF setting at start of conditional group
1645 reqchar -> place to put the last required character, or a negative number
1646 countlits -> place to put the shortest literal count of any branch
1647 cd points to the data block with tables pointers
1648
1649 Returns: TRUE on success
1650 */
1651
1652 static BOOL
1653 compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1654 const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1655 int *reqchar, int *countlits, compile_data *cd)
1656 {
1657 const uschar *ptr = *ptrptr;
1658 uschar *code = *codeptr;
1659 uschar *last_branch = code;
1660 uschar *start_bracket = code;
1661 uschar *reverse_count = NULL;
1662 int oldoptions = options & PCRE_IMS;
1663 int branchreqchar, branchcountlits;
1664
1665 *reqchar = -1;
1666 *countlits = INT_MAX;
1667 code += 3;
1668
1669 /* At the start of a reference-based conditional group, insert the reference
1670 number as an OP_CREF item. */
1671
1672 if (condref > 0)
1673 {
1674 *code++ = OP_CREF;
1675 *code++ = condref;
1676 }
1677
1678 /* Loop for each alternative branch */
1679
1680 for (;;)
1681 {
1682 int length;
1683
1684 /* Handle change of options */
1685
1686 if (optchanged >= 0)
1687 {
1688 *code++ = OP_OPT;
1689 *code++ = optchanged;
1690 options = (options & ~PCRE_IMS) | optchanged;
1691 }
1692
1693 /* Set up dummy OP_REVERSE if lookbehind assertion */
1694
1695 if (lookbehind)
1696 {
1697 *code++ = OP_REVERSE;
1698 reverse_count = code;
1699 *code++ = 0;
1700 *code++ = 0;
1701 }
1702
1703 /* Now compile the branch */
1704
1705 if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1706 &branchreqchar, &branchcountlits, cd))
1707 {
1708 *ptrptr = ptr;
1709 return FALSE;
1710 }
1711
1712 /* Fill in the length of the last branch */
1713
1714 length = code - last_branch;
1715 last_branch[1] = length >> 8;
1716 last_branch[2] = length & 255;
1717
1718 /* Save the last required character if all branches have the same; a current
1719 value of -1 means unset, while -2 means "previous branch had no last required
1720 char". */
1721
1722 if (*reqchar != -2)
1723 {
1724 if (branchreqchar >= 0)
1725 {
1726 if (*reqchar == -1) *reqchar = branchreqchar;
1727 else if (*reqchar != branchreqchar) *reqchar = -2;
1728 }
1729 else *reqchar = -2;
1730 }
1731
1732 /* Keep the shortest literal count */
1733
1734 if (branchcountlits < *countlits) *countlits = branchcountlits;
1735 DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1736
1737 /* If lookbehind, check that this branch matches a fixed-length string,
1738 and put the length into the OP_REVERSE item. Temporarily mark the end of
1739 the branch with OP_END. */
1740
1741 if (lookbehind)
1742 {
1743 *code = OP_END;
1744 length = find_fixedlength(last_branch);
1745 DPRINTF(("fixed length = %d\n", length));
1746 if (length < 0)
1747 {
1748 *errorptr = ERR25;
1749 *ptrptr = ptr;
1750 return FALSE;
1751 }
1752 reverse_count[0] = (length >> 8);
1753 reverse_count[1] = length & 255;
1754 }
1755
1756 /* Reached end of expression, either ')' or end of pattern. Insert a
1757 terminating ket and the length of the whole bracketed item, and return,
1758 leaving the pointer at the terminating char. If any of the ims options
1759 were changed inside the group, compile a resetting op-code following. */
1760
1761 if (*ptr != '|')
1762 {
1763 length = code - start_bracket;
1764 *code++ = OP_KET;
1765 *code++ = length >> 8;
1766 *code++ = length & 255;
1767 if (optchanged >= 0)
1768 {
1769 *code++ = OP_OPT;
1770 *code++ = oldoptions;
1771 }
1772 *codeptr = code;
1773 *ptrptr = ptr;
1774 return TRUE;
1775 }
1776
1777 /* Another branch follows; insert an "or" node and advance the pointer. */
1778
1779 *code = OP_ALT;
1780 last_branch = code;
1781 code += 3;
1782 ptr++;
1783 }
1784 /* Control never reaches here */
1785 }
1786
1787
1788
1789
1790 /*************************************************
1791 * Find first significant op code *
1792 *************************************************/
1793
1794 /* This is called by several functions that scan a compiled expression looking
1795 for a fixed first character, or an anchoring op code etc. It skips over things
1796 that do not influence this. For one application, a change of caseless option is
1797 important.
1798
1799 Arguments:
1800 code pointer to the start of the group
1801 options pointer to external options
1802 optbit the option bit whose changing is significant, or
1803 zero if none are
1804 optstop TRUE to return on option change, otherwise change the options
1805 value and continue
1806
1807 Returns: pointer to the first significant opcode
1808 */
1809
1810 static const uschar*
1811 first_significant_code(const uschar *code, int *options, int optbit,
1812 BOOL optstop)
1813 {
1814 for (;;)
1815 {
1816 switch ((int)*code)
1817 {
1818 case OP_OPT:
1819 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1820 {
1821 if (optstop) return code;
1822 *options = (int)code[1];
1823 }
1824 code += 2;
1825 break;
1826
1827 case OP_CREF:
1828 code += 2;
1829 break;
1830
1831 case OP_WORD_BOUNDARY:
1832 case OP_NOT_WORD_BOUNDARY:
1833 code++;
1834 break;
1835
1836 case OP_ASSERT_NOT:
1837 case OP_ASSERTBACK:
1838 case OP_ASSERTBACK_NOT:
1839 do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1840 code += 3;
1841 break;
1842
1843 default:
1844 return code;
1845 }
1846 }
1847 /* Control never reaches here */
1848 }
1849
1850
1851
1852
1853 /*************************************************
1854 * Check for anchored expression *
1855 *************************************************/
1856
1857 /* Try to find out if this is an anchored regular expression. Consider each
1858 alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1859 all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1860 it's anchored. However, if this is a multiline pattern, then only OP_SOD
1861 counts, since OP_CIRC can match in the middle.
1862
1863 A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1864 because that will try the rest of the pattern at all possible matching points,
1865 so there is no point trying them again.
1866
1867 Arguments:
1868 code points to start of expression (the bracket)
1869 options points to the options setting
1870
1871 Returns: TRUE or FALSE
1872 */
1873
1874 static BOOL
1875 is_anchored(register const uschar *code, int *options)
1876 {
1877 do {
1878 const uschar *scode = first_significant_code(code + 3, options,
1879 PCRE_MULTILINE, FALSE);
1880 register int op = *scode;
1881 if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1882 { if (!is_anchored(scode, options)) return FALSE; }
1883 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1884 (*options & PCRE_DOTALL) != 0)
1885 { if (scode[1] != OP_ANY) return FALSE; }
1886 else if (op != OP_SOD &&
1887 ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1888 return FALSE;
1889 code += (code[1] << 8) + code[2];
1890 }
1891 while (*code == OP_ALT);
1892 return TRUE;
1893 }
1894
1895
1896
1897 /*************************************************
1898 * Check for starting with ^ or .* *
1899 *************************************************/
1900
1901 /* This is called to find out if every branch starts with ^ or .* so that
1902 "first char" processing can be done to speed things up in multiline
1903 matching and for non-DOTALL patterns that start with .* (which must start at
1904 the beginning or after \n).
1905
1906 Argument: points to start of expression (the bracket)
1907 Returns: TRUE or FALSE
1908 */
1909
1910 static BOOL
1911 is_startline(const uschar *code)
1912 {
1913 do {
1914 const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1915 register int op = *scode;
1916 if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1917 { if (!is_startline(scode)) return FALSE; }
1918 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1919 { if (scode[1] != OP_ANY) return FALSE; }
1920 else if (op != OP_CIRC) return FALSE;
1921 code += (code[1] << 8) + code[2];
1922 }
1923 while (*code == OP_ALT);
1924 return TRUE;
1925 }
1926
1927
1928
1929 /*************************************************
1930 * Check for fixed first char *
1931 *************************************************/
1932
1933 /* Try to find out if there is a fixed first character. This is called for
1934 unanchored expressions, as it speeds up their processing quite considerably.
1935 Consider each alternative branch. If they all start with the same char, or with
1936 a bracket all of whose alternatives start with the same char (recurse ad lib),
1937 then we return that char, otherwise -1.
1938
1939 Arguments:
1940 code points to start of expression (the bracket)
1941 options pointer to the options (used to check casing changes)
1942
1943 Returns: -1 or the fixed first char
1944 */
1945
1946 static int
1947 find_firstchar(const uschar *code, int *options)
1948 {
1949 register int c = -1;
1950 do {
1951 int d;
1952 const uschar *scode = first_significant_code(code + 3, options,
1953 PCRE_CASELESS, TRUE);
1954 register int op = *scode;
1955
1956 if (op >= OP_BRA) op = OP_BRA;
1957
1958 switch(op)
1959 {
1960 default:
1961 return -1;
1962
1963 case OP_BRA:
1964 case OP_ASSERT:
1965 case OP_ONCE:
1966 case OP_COND:
1967 if ((d = find_firstchar(scode, options)) < 0) return -1;
1968 if (c < 0) c = d; else if (c != d) return -1;
1969 break;
1970
1971 case OP_EXACT: /* Fall through */
1972 scode++;
1973
1974 case OP_CHARS: /* Fall through */
1975 scode++;
1976
1977 case OP_PLUS:
1978 case OP_MINPLUS:
1979 if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1980 break;
1981 }
1982
1983 code += (code[1] << 8) + code[2];
1984 }
1985 while (*code == OP_ALT);
1986 return c;
1987 }
1988
1989
1990
1991
1992
1993 /*************************************************
1994 * Compile a Regular Expression *
1995 *************************************************/
1996
1997 /* This function takes a string and returns a pointer to a block of store
1998 holding a compiled version of the expression.
1999
2000 Arguments:
2001 pattern the regular expression
2002 options various option bits
2003 errorptr pointer to pointer to error text
2004 erroroffset ptr offset in pattern where error was detected
2005 tables pointer to character tables or NULL
2006
2007 Returns: pointer to compiled data block, or NULL on error,
2008 with errorptr and erroroffset set
2009 */
2010
2011 pcre *
2012 pcre_compile(const char *pattern, int options, const char **errorptr,
2013 int *erroroffset, const unsigned char *tables)
2014 {
2015 real_pcre *re;
2016 int length = 3; /* For initial BRA plus length */
2017 int runlength;
2018 int c, size, reqchar, countlits;
2019 int bracount = 0;
2020 int top_backref = 0;
2021 int branch_extra = 0;
2022 int branch_newextra;
2023 unsigned int brastackptr = 0;
2024 uschar *code;
2025 const uschar *ptr;
2026 compile_data compile_block;
2027 int brastack[BRASTACK_SIZE];
2028 uschar bralenstack[BRASTACK_SIZE];
2029
2030 #ifdef DEBUG
2031 uschar *code_base, *code_end;
2032 #endif
2033
2034 /* We can't pass back an error message if errorptr is NULL; I guess the best we
2035 can do is just return NULL. */
2036
2037 if (errorptr == NULL) return NULL;
2038 *errorptr = NULL;
2039
2040 /* However, we can give a message for this error */
2041
2042 if (erroroffset == NULL)
2043 {
2044 *errorptr = ERR16;
2045 return NULL;
2046 }
2047 *erroroffset = 0;
2048
2049 if ((options & ~PUBLIC_OPTIONS) != 0)
2050 {
2051 *errorptr = ERR17;
2052 return NULL;
2053 }
2054
2055 /* Set up pointers to the individual character tables */
2056
2057 if (tables == NULL) tables = pcre_default_tables;
2058 compile_block.lcc = tables + lcc_offset;
2059 compile_block.fcc = tables + fcc_offset;
2060 compile_block.cbits = tables + cbits_offset;
2061 compile_block.ctypes = tables + ctypes_offset;
2062
2063 /* Reflect pattern for debugging output */
2064
2065 DPRINTF(("------------------------------------------------------------------\n"));
2066 DPRINTF(("%s\n", pattern));
2067
2068 /* The first thing to do is to make a pass over the pattern to compute the
2069 amount of store required to hold the compiled code. This does not have to be
2070 perfect as long as errors are overestimates. At the same time we can detect any
2071 internal flag settings. Make an attempt to correct for any counted white space
2072 if an "extended" flag setting appears late in the pattern. We can't be so
2073 clever for #-comments. */
2074
2075 ptr = (const uschar *)(pattern - 1);
2076 while ((c = *(++ptr)) != 0)
2077 {
2078 int min, max;
2079 int class_charcount;
2080
2081 if ((options & PCRE_EXTENDED) != 0)
2082 {
2083 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2084 if (c == '#')
2085 {
2086 while ((c = *(++ptr)) != 0 && c != '\n');
2087 continue;
2088 }
2089 }
2090
2091 switch(c)
2092 {
2093 /* A backslashed item may be an escaped "normal" character or a
2094 character type. For a "normal" character, put the pointers and
2095 character back so that tests for whitespace etc. in the input
2096 are done correctly. */
2097
2098 case '\\':
2099 {
2100 const uschar *save_ptr = ptr;
2101 c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2102 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2103 if (c >= 0)
2104 {
2105 ptr = save_ptr;
2106 c = '\\';
2107 goto NORMAL_CHAR;
2108 }
2109 }
2110 length++;
2111
2112 /* A back reference needs an additional char, plus either one or 5
2113 bytes for a repeat. We also need to keep the value of the highest
2114 back reference. */
2115
2116 if (c <= -ESC_REF)
2117 {
2118 int refnum = -c - ESC_REF;
2119 if (refnum > top_backref) top_backref = refnum;
2120 length++; /* For single back reference */
2121 if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2122 {
2123 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2124 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2125 if ((min == 0 && (max == 1 || max == -1)) ||
2126 (min == 1 && max == -1))
2127 length++;
2128 else length += 5;
2129 if (ptr[1] == '?') ptr++;
2130 }
2131 }
2132 continue;
2133
2134 case '^':
2135 case '.':
2136 case '$':
2137 case '*': /* These repeats won't be after brackets; */
2138 case '+': /* those are handled separately */
2139 case '?':
2140 length++;
2141 continue;
2142
2143 /* This covers the cases of repeats after a single char, metachar, class,
2144 or back reference. */
2145
2146 case '{':
2147 if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2148 ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2149 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2150 if ((min == 0 && (max == 1 || max == -1)) ||
2151 (min == 1 && max == -1))
2152 length++;
2153 else
2154 {
2155 length--; /* Uncount the original char or metachar */
2156 if (min == 1) length++; else if (min > 0) length += 4;
2157 if (max > 0) length += 4; else length += 2;
2158 }
2159 if (ptr[1] == '?') ptr++;
2160 continue;
2161
2162 /* An alternation contains an offset to the next branch or ket. If any ims
2163 options changed in the previous branch(es), and/or if we are in a
2164 lookbehind assertion, extra space will be needed at the start of the
2165 branch. This is handled by branch_extra. */
2166
2167 case '|':
2168 length += 3 + branch_extra;
2169 continue;
2170
2171 /* A character class uses 33 characters. Don't worry about character types
2172 that aren't allowed in classes - they'll get picked up during the compile.
2173 A character class that contains only one character uses 2 or 3 bytes,
2174 depending on whether it is negated or not. Notice this where we can. */
2175
2176 case '[':
2177 class_charcount = 0;
2178 if (*(++ptr) == '^') ptr++;
2179 do
2180 {
2181 if (*ptr == '\\')
2182 {
2183 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2184 &compile_block);
2185 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2186 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2187 }
2188 else class_charcount++;
2189 ptr++;
2190 }
2191 while (*ptr != 0 && *ptr != ']');
2192
2193 /* Repeats for negated single chars are handled by the general code */
2194
2195 if (class_charcount == 1) length += 3; else
2196 {
2197 length += 33;
2198
2199 /* A repeat needs either 1 or 5 bytes. */
2200
2201 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2202 {
2203 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2204 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2205 if ((min == 0 && (max == 1 || max == -1)) ||
2206 (min == 1 && max == -1))
2207 length++;
2208 else length += 5;
2209 if (ptr[1] == '?') ptr++;
2210 }
2211 }
2212 continue;
2213
2214 /* Brackets may be genuine groups or special things */
2215
2216 case '(':
2217 branch_newextra = 0;
2218
2219 /* Handle special forms of bracket, which all start (? */
2220
2221 if (ptr[1] == '?')
2222 {
2223 int set, unset;
2224 int *optset;
2225
2226 switch (c = ptr[2])
2227 {
2228 /* Skip over comments entirely */
2229 case '#':
2230 ptr += 3;
2231 while (*ptr != 0 && *ptr != ')') ptr++;
2232 if (*ptr == 0)
2233 {
2234 *errorptr = ERR18;
2235 goto PCRE_ERROR_RETURN;
2236 }
2237 continue;
2238
2239 /* Non-referencing groups and lookaheads just move the pointer on, and
2240 then behave like a non-special bracket, except that they don't increment
2241 the count of extracting brackets. Ditto for the "once only" bracket,
2242 which is in Perl from version 5.005. */
2243
2244 case ':':
2245 case '=':
2246 case '!':
2247 case '>':
2248 ptr += 2;
2249 break;
2250
2251 /* Lookbehinds are in Perl from version 5.005 */
2252
2253 case '<':
2254 if (ptr[3] == '=' || ptr[3] == '!')
2255 {
2256 ptr += 3;
2257 branch_newextra = 3;
2258 length += 3; /* For the first branch */
2259 break;
2260 }
2261 *errorptr = ERR24;
2262 goto PCRE_ERROR_RETURN;
2263
2264 /* Conditionals are in Perl from version 5.005. The bracket must either
2265 be followed by a number (for bracket reference) or by an assertion
2266 group. */
2267
2268 case '(':
2269 if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2270 {
2271 ptr += 4;
2272 length += 2;
2273 while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2274 if (*ptr != ')')
2275 {
2276 *errorptr = ERR26;
2277 goto PCRE_ERROR_RETURN;
2278 }
2279 }
2280 else /* An assertion must follow */
2281 {
2282 ptr++; /* Can treat like ':' as far as spacing is concerned */
2283
2284 if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2285 {
2286 ptr += 2; /* To get right offset in message */
2287 *errorptr = ERR28;
2288 goto PCRE_ERROR_RETURN;
2289 }
2290 }
2291 break;
2292
2293 /* Else loop checking valid options until ) is met. Anything else is an
2294 error. If we are without any brackets, i.e. at top level, the settings
2295 act as if specified in the options, so massage the options immediately.
2296 This is for backward compatibility with Perl 5.004. */
2297
2298 default:
2299 set = unset = 0;
2300 optset = &set;
2301 ptr += 2;
2302
2303 for (;; ptr++)
2304 {
2305 c = *ptr;
2306 switch (c)
2307 {
2308 case 'i':
2309 *optset |= PCRE_CASELESS;
2310 continue;
2311
2312 case 'm':
2313 *optset |= PCRE_MULTILINE;
2314 continue;
2315
2316 case 's':
2317 *optset |= PCRE_DOTALL;
2318 continue;
2319
2320 case 'x':
2321 *optset |= PCRE_EXTENDED;
2322 continue;
2323
2324 case 'X':
2325 *optset |= PCRE_EXTRA;
2326 continue;
2327
2328 case 'U':
2329 *optset |= PCRE_UNGREEDY;
2330 continue;
2331
2332 case '-':
2333 optset = &unset;
2334 continue;
2335
2336 /* A termination by ')' indicates an options-setting-only item;
2337 this is global at top level; otherwise nothing is done here and
2338 it is handled during the compiling process on a per-bracket-group
2339 basis. */
2340
2341 case ')':
2342 if (brastackptr == 0)
2343 {
2344 options = (options | set) & (~unset);
2345 set = unset = 0; /* To save length */
2346 }
2347 /* Fall through */
2348
2349 /* A termination by ':' indicates the start of a nested group with
2350 the given options set. This is again handled at compile time, but
2351 we must allow for compiled space if any of the ims options are
2352 set. We also have to allow for resetting space at the end of
2353 the group, which is why 4 is added to the length and not just 2.
2354 If there are several changes of options within the same group, this
2355 will lead to an over-estimate on the length, but this shouldn't
2356 matter very much. We also have to allow for resetting options at
2357 the start of any alternations, which we do by setting
2358 branch_newextra to 2. Finally, we record whether the case-dependent
2359 flag ever changes within the regex. This is used by the "required
2360 character" code. */
2361
2362 case ':':
2363 if (((set|unset) & PCRE_IMS) != 0)
2364 {
2365 length += 4;
2366 branch_newextra = 2;
2367 if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2368 }
2369 goto END_OPTIONS;
2370
2371 /* Unrecognized option character */
2372
2373 default:
2374 *errorptr = ERR12;
2375 goto PCRE_ERROR_RETURN;
2376 }
2377 }
2378
2379 /* If we hit a closing bracket, that's it - this is a freestanding
2380 option-setting. We need to ensure that branch_extra is updated if
2381 necessary. The only values branch_newextra can have here are 0 or 2.
2382 If the value is 2, then branch_extra must either be 2 or 5, depending
2383 on whether this is a lookbehind group or not. */
2384
2385 END_OPTIONS:
2386 if (c == ')')
2387 {
2388 if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2389 branch_extra += branch_newextra;
2390 continue;
2391 }
2392
2393 /* If options were terminated by ':' control comes here. Fall through
2394 to handle the group below. */
2395 }
2396 }
2397
2398 /* Extracting brackets must be counted so we can process escapes in a
2399 Perlish way. */
2400
2401 else bracount++;
2402
2403 /* Non-special forms of bracket. Save length for computing whole length
2404 at end if there's a repeat that requires duplication of the group. Also
2405 save the current value of branch_extra, and start the new group with
2406 the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2407 for a lookbehind assertion. */
2408
2409 if (brastackptr >= sizeof(brastack)/sizeof(int))
2410 {
2411 *errorptr = ERR19;
2412 goto PCRE_ERROR_RETURN;
2413 }
2414
2415 bralenstack[brastackptr] = branch_extra;
2416 branch_extra = branch_newextra;
2417
2418 brastack[brastackptr++] = length;
2419 length += 3;
2420 continue;
2421
2422 /* Handle ket. Look for subsequent max/min; for certain sets of values we
2423 have to replicate this bracket up to that many times. If brastackptr is
2424 0 this is an unmatched bracket which will generate an error, but take care
2425 not to try to access brastack[-1] when computing the length and restoring
2426 the branch_extra value. */
2427
2428 case ')':
2429 length += 3;
2430 {
2431 int minval = 1;
2432 int maxval = 1;
2433 int duplength;
2434
2435 if (brastackptr > 0)
2436 {
2437 duplength = length - brastack[--brastackptr];
2438 branch_extra = bralenstack[brastackptr];
2439 }
2440 else duplength = 0;
2441
2442 /* Leave ptr at the final char; for read_repeat_counts this happens
2443 automatically; for the others we need an increment. */
2444
2445 if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2446 {
2447 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2448 &compile_block);
2449 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2450 }
2451 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2452 else if (c == '+') { maxval = -1; ptr++; }
2453 else if (c == '?') { minval = 0; ptr++; }
2454
2455 /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2456 group, and if the maximum is greater than zero, we have to replicate
2457 maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2458 bracket set - hence the 7. */
2459
2460 if (minval == 0)
2461 {
2462 length++;
2463 if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2464 }
2465
2466 /* When the minimum is greater than zero, 1 we have to replicate up to
2467 minval-1 times, with no additions required in the copies. Then, if
2468 there is a limited maximum we have to replicate up to maxval-1 times
2469 allowing for a BRAZERO item before each optional copy and nesting
2470 brackets for all but one of the optional copies. */
2471
2472 else
2473 {
2474 length += (minval - 1) * duplength;
2475 if (maxval > minval) /* Need this test as maxval=-1 means no limit */
2476 length += (maxval - minval) * (duplength + 7) - 6;
2477 }
2478 }
2479 continue;
2480
2481 /* Non-special character. For a run of such characters the length required
2482 is the number of characters + 2, except that the maximum run length is 255.
2483 We won't get a skipped space or a non-data escape or the start of a #
2484 comment as the first character, so the length can't be zero. */
2485
2486 NORMAL_CHAR:
2487 default:
2488 length += 2;
2489 runlength = 0;
2490 do
2491 {
2492 if ((options & PCRE_EXTENDED) != 0)
2493 {
2494 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2495 if (c == '#')
2496 {
2497 while ((c = *(++ptr)) != 0 && c != '\n');
2498 continue;
2499 }
2500 }
2501
2502 /* Backslash may introduce a data char or a metacharacter; stop the
2503 string before the latter. */
2504
2505 if (c == '\\')
2506 {
2507 const uschar *saveptr = ptr;
2508 c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2509 &compile_block);
2510 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2511 if (c < 0) { ptr = saveptr; break; }
2512 }
2513
2514 /* Ordinary character or single-char escape */
2515
2516 runlength++;
2517 }
2518
2519 /* This "while" is the end of the "do" above. */
2520
2521 while (runlength < 255 &&
2522 (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2523
2524 ptr--;
2525 length += runlength;
2526 continue;
2527 }
2528 }
2529
2530 length += 4; /* For final KET and END */
2531
2532 if (length > 65539)
2533 {
2534 *errorptr = ERR20;
2535 return NULL;
2536 }
2537
2538 /* Compute the size of data block needed and get it, either from malloc or
2539 externally provided function. We specify "code[0]" in the offsetof() expression
2540 rather than just "code", because it has been reported that one broken compiler
2541 fails on "code" because it is also an independent variable. It should make no
2542 difference to the value of the offsetof(). */
2543
2544 size = length + offsetof(real_pcre, code[0]);
2545 re = (real_pcre *)(pcre_malloc)(size);
2546
2547 if (re == NULL)
2548 {
2549 *errorptr = ERR21;
2550 return NULL;
2551 }
2552
2553 /* Put in the magic number and the options. */
2554
2555 re->magic_number = MAGIC_NUMBER;
2556 re->options = options;
2557 re->tables = tables;
2558
2559 /* Set up a starting, non-extracting bracket, then compile the expression. On
2560 error, *errorptr will be set non-NULL, so we don't need to look at the result
2561 of the function here. */
2562
2563 ptr = (const uschar *)pattern;
2564 code = re->code;
2565 *code = OP_BRA;
2566 bracount = 0;
2567 (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2568 &reqchar, &countlits, &compile_block);
2569 re->top_bracket = bracount;
2570 re->top_backref = top_backref;
2571
2572 /* If not reached end of pattern on success, there's an excess bracket. */
2573
2574 if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
2575
2576 /* Fill in the terminating state and check for disastrous overflow, but
2577 if debugging, leave the test till after things are printed out. */
2578
2579 *code++ = OP_END;
2580
2581 #ifndef DEBUG
2582 if (code - re->code > length) *errorptr = ERR23;
2583 #endif
2584
2585 /* Give an error if there's back reference to a non-existent capturing
2586 subpattern. */
2587
2588 if (top_backref > re->top_bracket) *errorptr = ERR15;
2589
2590 /* Failed to compile */
2591
2592 if (*errorptr != NULL)
2593 {
2594 (pcre_free)(re);
2595 PCRE_ERROR_RETURN:
2596 *erroroffset = ptr - (const uschar *)pattern;
2597 return NULL;
2598 }
2599
2600 /* If the anchored option was not passed, set flag if we can determine that the
2601 pattern is anchored by virtue of ^ characters or \A or anything else (such as
2602 starting with .* when DOTALL is set).
2603
2604 Otherwise, see if we can determine what the first character has to be, because
2605 that speeds up unanchored matches no end. If not, see if we can set the
2606 PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2607 start with ^. and also when all branches start with .* for non-DOTALL matches.
2608 */
2609
2610 if ((options & PCRE_ANCHORED) == 0)
2611 {
2612 int temp_options = options;
2613 if (is_anchored(re->code, &temp_options))
2614 re->options |= PCRE_ANCHORED;
2615 else
2616 {
2617 int ch = find_firstchar(re->code, &temp_options);
2618 if (ch >= 0)
2619 {
2620 re->first_char = ch;
2621 re->options |= PCRE_FIRSTSET;
2622 }
2623 else if (is_startline(re->code))
2624 re->options |= PCRE_STARTLINE;
2625 }
2626 }
2627
2628 /* Save the last required character if there are at least two literal
2629 characters on all paths, or if there is no first character setting. */
2630
2631 if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2632 {
2633 re->req_char = reqchar;
2634 re->options |= PCRE_REQCHSET;
2635 }
2636
2637 /* Print out the compiled data for debugging */
2638
2639 #ifdef DEBUG
2640
2641 printf("Length = %d top_bracket = %d top_backref = %d\n",
2642 length, re->top_bracket, re->top_backref);
2643
2644 if (re->options != 0)
2645 {
2646 printf("%s%s%s%s%s%s%s%s%s\n",
2647 ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2648 ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2649 ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2650 ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2651 ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2652 ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2653 ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2654 ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2655 ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2656 }
2657
2658 if ((re->options & PCRE_FIRSTSET) != 0)
2659 {
2660 if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
2661 else printf("First char = \\x%02x\n", re->first_char);
2662 }
2663
2664 if ((re->options & PCRE_REQCHSET) != 0)
2665 {
2666 if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2667 else printf("Req char = \\x%02x\n", re->req_char);
2668 }
2669
2670 code_end = code;
2671 code_base = code = re->code;
2672
2673 while (code < code_end)
2674 {
2675 int charlength;
2676
2677 printf("%3d ", code - code_base);
2678
2679 if (*code >= OP_BRA)
2680 {
2681 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2682 code += 2;
2683 }
2684
2685 else switch(*code)
2686 {
2687 case OP_OPT:
2688 printf(" %.2x %s", code[1], OP_names[*code]);
2689 code++;
2690 break;
2691
2692 case OP_COND:
2693 printf("%3d Cond", (code[1] << 8) + code[2]);
2694 code += 2;
2695 break;
2696
2697 case OP_CREF:
2698 printf(" %.2d %s", code[1], OP_names[*code]);
2699 code++;
2700 break;
2701
2702 case OP_CHARS:
2703 charlength = *(++code);
2704 printf("%3d ", charlength);
2705 while (charlength-- > 0)
2706 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2707 break;
2708
2709 case OP_KETRMAX:
2710 case OP_KETRMIN:
2711 case OP_ALT:
2712 case OP_KET:
2713 case OP_ASSERT:
2714 case OP_ASSERT_NOT:
2715 case OP_ASSERTBACK:
2716 case OP_ASSERTBACK_NOT:
2717 case OP_ONCE:
2718 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2719 code += 2;
2720 break;
2721
2722 case OP_REVERSE:
2723 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2724 code += 2;
2725 break;
2726
2727 case OP_STAR:
2728 case OP_MINSTAR:
2729 case OP_PLUS:
2730 case OP_MINPLUS:
2731 case OP_QUERY:
2732 case OP_MINQUERY:
2733 case OP_TYPESTAR:
2734 case OP_TYPEMINSTAR:
2735 case OP_TYPEPLUS:
2736 case OP_TYPEMINPLUS:
2737 case OP_TYPEQUERY:
2738 case OP_TYPEMINQUERY:
2739 if (*code >= OP_TYPESTAR)
2740 printf(" %s", OP_names[code[1]]);
2741 else if (isprint(c = code[1])) printf(" %c", c);
2742 else printf(" \\x%02x", c);
2743 printf("%s", OP_names[*code++]);
2744 break;
2745
2746 case OP_EXACT:
2747 case OP_UPTO:
2748 case OP_MINUPTO:
2749 if (isprint(c = code[3])) printf(" %c{", c);
2750 else printf(" \\x%02x{", c);
2751 if (*code != OP_EXACT) printf("0,");
2752 printf("%d}", (code[1] << 8) + code[2]);
2753 if (*code == OP_MINUPTO) printf("?");
2754 code += 3;
2755 break;
2756
2757 case OP_TYPEEXACT:
2758 case OP_TYPEUPTO:
2759 case OP_TYPEMINUPTO:
2760 printf(" %s{", OP_names[code[3]]);
2761 if (*code != OP_TYPEEXACT) printf(",");
2762 printf("%d}", (code[1] << 8) + code[2]);
2763 if (*code == OP_TYPEMINUPTO) printf("?");
2764 code += 3;
2765 break;
2766
2767 case OP_NOT:
2768 if (isprint(c = *(++code))) printf(" [^%c]", c);
2769 else printf(" [^\\x%02x]", c);
2770 break;
2771
2772 case OP_NOTSTAR:
2773 case OP_NOTMINSTAR:
2774 case OP_NOTPLUS:
2775 case OP_NOTMINPLUS:
2776 case OP_NOTQUERY:
2777 case OP_NOTMINQUERY:
2778 if (isprint(c = code[1])) printf(" [^%c]", c);
2779 else printf(" [^\\x%02x]", c);
2780 printf("%s", OP_names[*code++]);
2781 break;
2782
2783 case OP_NOTEXACT:
2784 case OP_NOTUPTO:
2785 case OP_NOTMINUPTO:
2786 if (isprint(c = code[3])) printf(" [^%c]{", c);
2787 else printf(" [^\\x%02x]{", c);
2788 if (*code != OP_NOTEXACT) printf(",");
2789 printf("%d}", (code[1] << 8) + code[2]);
2790 if (*code == OP_NOTMINUPTO) printf("?");
2791 code += 3;
2792 break;
2793
2794 case OP_REF:
2795 printf(" \\%d", *(++code));
2796 code ++;
2797 goto CLASS_REF_REPEAT;
2798
2799 case OP_CLASS:
2800 {
2801 int i, min, max;
2802 code++;
2803 printf(" [");
2804
2805 for (i = 0; i < 256; i++)
2806 {
2807 if ((code[i/8] & (1 << (i&7))) != 0)
2808 {
2809 int j;
2810 for (j = i+1; j < 256; j++)
2811 if ((code[j/8] & (1 << (j&7))) == 0) break;
2812 if (i == '-' || i == ']') printf("\\");
2813 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2814 if (--j > i)
2815 {
2816 printf("-");
2817 if (j == '-' || j == ']') printf("\\");
2818 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2819 }
2820 i = j;
2821 }
2822 }
2823 printf("]");
2824 code += 32;
2825
2826 CLASS_REF_REPEAT:
2827
2828 switch(*code)
2829 {
2830 case OP_CRSTAR:
2831 case OP_CRMINSTAR:
2832 case OP_CRPLUS:
2833 case OP_CRMINPLUS:
2834 case OP_CRQUERY:
2835 case OP_CRMINQUERY:
2836 printf("%s", OP_names[*code]);
2837 break;
2838
2839 case OP_CRRANGE:
2840 case OP_CRMINRANGE:
2841 min = (code[1] << 8) + code[2];
2842 max = (code[3] << 8) + code[4];
2843 if (max == 0) printf("{%d,}", min);
2844 else printf("{%d,%d}", min, max);
2845 if (*code == OP_CRMINRANGE) printf("?");
2846 code += 4;
2847 break;
2848
2849 default:
2850 code--;
2851 }
2852 }
2853 break;
2854
2855 /* Anything else is just a one-node item */
2856
2857 default:
2858 printf(" %s", OP_names[*code]);
2859 break;
2860 }
2861
2862 code++;
2863 printf("\n");
2864 }
2865 printf("------------------------------------------------------------------\n");
2866
2867 /* This check is done here in the debugging case so that the code that
2868 was compiled can be seen. */
2869
2870 if (code - re->code > length)
2871 {
2872 *errorptr = ERR23;
2873 (pcre_free)(re);
2874 *erroroffset = ptr - (uschar *)pattern;
2875 return NULL;
2876 }
2877 #endif
2878
2879 return (pcre *)re;
2880 }
2881
2882
2883
2884 /*************************************************
2885 * Match a back-reference *
2886 *************************************************/
2887
2888 /* If a back reference hasn't been set, the length that is passed is greater
2889 than the number of characters left in the string, so the match fails.
2890
2891 Arguments:
2892 offset index into the offset vector
2893 eptr points into the subject
2894 length length to be matched
2895 md points to match data block
2896 ims the ims flags
2897
2898 Returns: TRUE if matched
2899 */
2900
2901 static BOOL
2902 match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2903 unsigned long int ims)
2904 {
2905 const uschar *p = md->start_subject + md->offset_vector[offset];
2906
2907 #ifdef DEBUG
2908 if (eptr >= md->end_subject)
2909 printf("matching subject <null>");
2910 else
2911 {
2912 printf("matching subject ");
2913 pchars(eptr, length, TRUE, md);
2914 }
2915 printf(" against backref ");
2916 pchars(p, length, FALSE, md);
2917 printf("\n");
2918 #endif
2919
2920 /* Always fail if not enough characters left */
2921
2922 if (length > md->end_subject - eptr) return FALSE;
2923
2924 /* Separate the caselesss case for speed */
2925
2926 if ((ims & PCRE_CASELESS) != 0)
2927 {
2928 while (length-- > 0)
2929 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2930 }
2931 else
2932 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2933
2934 return TRUE;
2935 }
2936
2937
2938
2939 /*************************************************
2940 * Match from current position *
2941 *************************************************/
2942
2943 /* On entry ecode points to the first opcode, and eptr to the first character
2944 in the subject string, while eptrb holds the value of eptr at the start of the
2945 last bracketed group - used for breaking infinite loops matching zero-length
2946 strings.
2947
2948 Arguments:
2949 eptr pointer in subject
2950 ecode position in code
2951 offset_top current top pointer
2952 md pointer to "static" info for the match
2953 ims current /i, /m, and /s options
2954 condassert TRUE if called to check a condition assertion
2955 eptrb eptr at start of last bracket
2956
2957 Returns: TRUE if matched
2958 */
2959
2960 static BOOL
2961 match(register const uschar *eptr, register const uschar *ecode,
2962 int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
2963 const uschar *eptrb)
2964 {
2965 unsigned long int original_ims = ims; /* Save for resetting on ')' */
2966
2967 for (;;)
2968 {
2969 int op = (int)*ecode;
2970 int min, max, ctype;
2971 register int i;
2972 register int c;
2973 BOOL minimize = FALSE;
2974
2975 /* Opening capturing bracket. If there is space in the offset vector, save
2976 the current subject position in the working slot at the top of the vector. We
2977 mustn't change the current values of the data slot, because they may be set
2978 from a previous iteration of this group, and be referred to by a reference
2979 inside the group.
2980
2981 If the bracket fails to match, we need to restore this value and also the
2982 values of the final offsets, in case they were set by a previous iteration of
2983 the same bracket.
2984
2985 If there isn't enough space in the offset vector, treat this as if it were a
2986 non-capturing bracket. Don't worry about setting the flag for the error case
2987 here; that is handled in the code for KET. */
2988
2989 if (op > OP_BRA)
2990 {
2991 int number = op - OP_BRA;
2992 int offset = number << 1;
2993
2994 #ifdef DEBUG
2995 printf("start bracket %d subject=", number);
2996 pchars(eptr, 16, TRUE, md);
2997 printf("\n");
2998 #endif
2999
3000 if (offset < md->offset_max)
3001 {
3002 int save_offset1 = md->offset_vector[offset];
3003 int save_offset2 = md->offset_vector[offset+1];
3004 int save_offset3 = md->offset_vector[md->offset_end - number];
3005
3006 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3007 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3008
3009 do
3010 {
3011 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3012 ecode += (ecode[1] << 8) + ecode[2];
3013 }
3014 while (*ecode == OP_ALT);
3015
3016 DPRINTF(("bracket %d failed\n", number));
3017
3018 md->offset_vector[offset] = save_offset1;
3019 md->offset_vector[offset+1] = save_offset2;
3020 md->offset_vector[md->offset_end - number] = save_offset3;
3021 return FALSE;
3022 }
3023
3024 /* Insufficient room for saving captured contents */
3025
3026 else op = OP_BRA;
3027 }
3028
3029 /* Other types of node can be handled by a switch */
3030
3031 switch(op)
3032 {
3033 case OP_BRA: /* Non-capturing bracket: optimized */
3034 DPRINTF(("start bracket 0\n"));
3035 do
3036 {
3037 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3038 ecode += (ecode[1] << 8) + ecode[2];
3039 }
3040 while (*ecode == OP_ALT);
3041 DPRINTF(("bracket 0 failed\n"));
3042 return FALSE;
3043
3044 /* Conditional group: compilation checked that there are no more than
3045 two branches. If the condition is false, skipping the first branch takes us
3046 past the end if there is only one branch, but that's OK because that is
3047 exactly what going to the ket would do. */
3048
3049 case OP_COND:
3050 if (ecode[3] == OP_CREF) /* Condition is extraction test */
3051 {
3052 int offset = ecode[4] << 1; /* Doubled reference number */
3053 return match(eptr,
3054 ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3055 5 : 3 + (ecode[1] << 8) + ecode[2]),
3056 offset_top, md, ims, FALSE, eptr);
3057 }
3058
3059 /* The condition is an assertion. Call match() to evaluate it - setting
3060 the final argument TRUE causes it to stop at the end of an assertion. */
3061
3062 else
3063 {
3064 if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3065 {
3066 ecode += 3 + (ecode[4] << 8) + ecode[5];
3067 while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3068 }
3069 else ecode += (ecode[1] << 8) + ecode[2];
3070 return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3071 }
3072 /* Control never reaches here */
3073
3074 /* Skip over conditional reference data if encountered (should not be) */
3075
3076 case OP_CREF:
3077 ecode += 2;
3078 break;
3079
3080 /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3081 an empty string - recursion will then try other alternatives, if any. */
3082
3083 case OP_END:
3084 if (md->notempty && eptr == md->start_match) return FALSE;
3085 md->end_match_ptr = eptr; /* Record where we ended */
3086 md->end_offset_top = offset_top; /* and how many extracts were taken */
3087 return TRUE;
3088
3089 /* Change option settings */
3090
3091 case OP_OPT:
3092 ims = ecode[1];
3093 ecode += 2;
3094 DPRINTF(("ims set to %02lx\n", ims));
3095 break;
3096
3097 /* Assertion brackets. Check the alternative branches in turn - the
3098 matching won't pass the KET for an assertion. If any one branch matches,
3099 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3100 start of each branch to move the current point backwards, so the code at
3101 this level is identical to the lookahead case. */
3102
3103 case OP_ASSERT:
3104 case OP_ASSERTBACK:
3105 do
3106 {
3107 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3108 ecode += (ecode[1] << 8) + ecode[2];
3109 }
3110 while (*ecode == OP_ALT);
3111 if (*ecode == OP_KET) return FALSE;
3112
3113 /* If checking an assertion for a condition, return TRUE. */
3114
3115 if (condassert) return TRUE;
3116
3117 /* Continue from after the assertion, updating the offsets high water
3118 mark, since extracts may have been taken during the assertion. */
3119
3120 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3121 ecode += 3;
3122 offset_top = md->end_offset_top;
3123 continue;
3124
3125 /* Negative assertion: all branches must fail to match */
3126
3127 case OP_ASSERT_NOT:
3128 case OP_ASSERTBACK_NOT:
3129 do
3130 {
3131 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3132 ecode += (ecode[1] << 8) + ecode[2];
3133 }
3134 while (*ecode == OP_ALT);
3135
3136 if (condassert) return TRUE;
3137 ecode += 3;
3138 continue;
3139
3140 /* Move the subject pointer back. This occurs only at the start of
3141 each branch of a lookbehind assertion. If we are too close to the start to
3142 move back, this match function fails. */
3143
3144 case OP_REVERSE:
3145 eptr -= (ecode[1] << 8) + ecode[2];
3146 if (eptr < md->start_subject) return FALSE;
3147 ecode += 3;
3148 break;
3149
3150
3151 /* "Once" brackets are like assertion brackets except that after a match,
3152 the point in the subject string is not moved back. Thus there can never be
3153 a move back into the brackets. Check the alternative branches in turn - the
3154 matching won't pass the KET for this kind of subpattern. If any one branch
3155 matches, we carry on as at the end of a normal bracket, leaving the subject
3156 pointer. */
3157
3158 case OP_ONCE:
3159 {
3160 const uschar *prev = ecode;
3161
3162 do
3163 {
3164 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3165 ecode += (ecode[1] << 8) + ecode[2];
3166 }
3167 while (*ecode == OP_ALT);
3168
3169 /* If hit the end of the group (which could be repeated), fail */
3170
3171 if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3172
3173 /* Continue as from after the assertion, updating the offsets high water
3174 mark, since extracts may have been taken. */
3175
3176 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3177
3178 offset_top = md->end_offset_top;
3179 eptr = md->end_match_ptr;
3180
3181 /* For a non-repeating ket, just continue at this level. This also
3182 happens for a repeating ket if no characters were matched in the group.
3183 This is the forcible breaking of infinite loops as implemented in Perl
3184 5.005. If there is an options reset, it will get obeyed in the normal
3185 course of events. */
3186
3187 if (*ecode == OP_KET || eptr == eptrb)
3188 {
3189 ecode += 3;
3190 break;
3191 }
3192
3193 /* The repeating kets try the rest of the pattern or restart from the
3194 preceding bracket, in the appropriate order. We need to reset any options
3195 that changed within the bracket before re-running it, so check the next
3196 opcode. */
3197
3198 if (ecode[3] == OP_OPT)
3199 {
3200 ims = (ims & ~PCRE_IMS) | ecode[4];
3201 DPRINTF(("ims set to %02lx at group repeat\n", ims));
3202 }
3203
3204 if (*ecode == OP_KETRMIN)
3205 {
3206 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3207 match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3208 }
3209 else /* OP_KETRMAX */
3210 {
3211 if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3212 match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3213 }
3214 }
3215 return FALSE;
3216
3217 /* An alternation is the end of a branch; scan along to find the end of the
3218 bracketed group and go to there. */
3219
3220 case OP_ALT:
3221 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3222 break;
3223
3224 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
3225 that it may occur zero times. It may repeat infinitely, or not at all -
3226 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
3227 repeat limits are compiled as a number of copies, with the optional ones
3228 preceded by BRAZERO or BRAMINZERO. */
3229
3230 case OP_BRAZERO:
3231 {
3232 const uschar *next = ecode+1;
3233 if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3234 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3235 ecode = next + 3;
3236 }
3237 break;
3238
3239 case OP_BRAMINZERO:
3240 {
3241 const uschar *next = ecode+1;
3242 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3243 if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3244 ecode++;
3245 }
3246 break;
3247
3248 /* End of a group, repeated or non-repeating. If we are at the end of
3249 an assertion "group", stop matching and return TRUE, but record the
3250 current high water mark for use by positive assertions. Do this also
3251 for the "once" (not-backup up) groups. */
3252
3253 case OP_KET:
3254 case OP_KETRMIN:
3255 case OP_KETRMAX:
3256 {
3257 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3258
3259 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3260 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3261 *prev == OP_ONCE)
3262 {
3263 md->end_match_ptr = eptr; /* For ONCE */
3264 md->end_offset_top = offset_top;
3265 return TRUE;
3266 }
3267
3268 /* In all other cases except a conditional group we have to check the
3269 group number back at the start and if necessary complete handling an
3270 extraction by setting the offsets and bumping the high water mark. */
3271
3272 if (*prev != OP_COND)
3273 {
3274 int number = *prev - OP_BRA;
3275 int offset = number << 1;
3276
3277 DPRINTF(("end bracket %d\n", number));
3278
3279 if (number > 0)
3280 {
3281 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3282 {
3283 md->offset_vector[offset] =
3284 md->offset_vector[md->offset_end - number];
3285 md->offset_vector[offset+1] = eptr - md->start_subject;
3286 if (offset_top <= offset) offset_top = offset + 2;
3287 }
3288 }
3289 }
3290
3291 /* Reset the value of the ims flags, in case they got changed during
3292 the group. */
3293
3294 ims = original_ims;
3295 DPRINTF(("ims reset to %02lx\n", ims));
3296
3297 /* For a non-repeating ket, just continue at this level. This also
3298 happens for a repeating ket if no characters were matched in the group.
3299 This is the forcible breaking of infinite loops as implemented in Perl
3300 5.005. If there is an options reset, it will get obeyed in the normal
3301 course of events. */
3302
3303 if (*ecode == OP_KET || eptr == eptrb)
3304 {
3305 ecode += 3;
3306 break;
3307 }
3308
3309 /* The repeating kets try the rest of the pattern or restart from the
3310 preceding bracket, in the appropriate order. */
3311
3312 if (*ecode == OP_KETRMIN)
3313 {
3314 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3315 match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3316 }
3317 else /* OP_KETRMAX */
3318 {
3319 if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3320 match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3321 }
3322 }
3323 return FALSE;
3324
3325 /* Start of subject unless notbol, or after internal newline if multiline */
3326
3327 case OP_CIRC:
3328 if (md->notbol && eptr == md->start_subject) return FALSE;
3329 if ((ims & PCRE_MULTILINE) != 0)
3330 {
3331 if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3332 ecode++;
3333 break;
3334 }
3335 /* ... else fall through */
3336
3337 /* Start of subject assertion */
3338
3339 case OP_SOD:
3340 if (eptr != md->start_subject) return FALSE;
3341 ecode++;
3342 break;
3343
3344 /* Assert before internal newline if multiline, or before a terminating
3345 newline unless endonly is set, else end of subject unless noteol is set. */
3346
3347 case OP_DOLL:
3348 if ((ims & PCRE_MULTILINE) != 0)
3349 {
3350 if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3351 else { if (md->noteol) return FALSE; }
3352 ecode++;
3353 break;
3354 }
3355 else
3356 {
3357 if (md->noteol) return FALSE;
3358 if (!md->endonly)
3359 {
3360 if (eptr < md->end_subject - 1 ||
3361 (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3362
3363 ecode++;
3364 break;
3365 }
3366 }
3367 /* ... else fall through */
3368
3369 /* End of subject assertion (\z) */
3370
3371 case OP_EOD:
3372 if (eptr < md->end_subject) return FALSE;
3373 ecode++;
3374 break;
3375
3376 /* End of subject or ending \n assertion (\Z) */
3377
3378 case OP_EODN:
3379 if (eptr < md->end_subject - 1 ||
3380 (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3381 ecode++;
3382 break;
3383
3384 /* Word boundary assertions */
3385
3386 case OP_NOT_WORD_BOUNDARY:
3387 case OP_WORD_BOUNDARY:
3388 {
3389 BOOL prev_is_word = (eptr != md->start_subject) &&
3390 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3391 BOOL cur_is_word = (eptr < md->end_subject) &&
3392 ((md->ctypes[*eptr] & ctype_word) != 0);
3393 if ((*ecode++ == OP_WORD_BOUNDARY)?
3394 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3395 return FALSE;
3396 }
3397 break;
3398
3399 /* Match a single character type; inline for speed */
3400
3401 case OP_ANY:
3402 if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3403 return FALSE;
3404 if (eptr++ >= md->end_subject) return FALSE;
3405 ecode++;
3406 break;
3407
3408 case OP_NOT_DIGIT:
3409 if (eptr >= md->end_subject ||
3410 (md->ctypes[*eptr++] & ctype_digit) != 0)
3411 return FALSE;
3412 ecode++;
3413 break;
3414
3415 case OP_DIGIT:
3416 if (eptr >= md->end_subject ||
3417 (md->ctypes[*eptr++] & ctype_digit) == 0)
3418 return FALSE;
3419 ecode++;
3420 break;
3421
3422 case OP_NOT_WHITESPACE:
3423 if (eptr >= md->end_subject ||
3424 (md->ctypes[*eptr++] & ctype_space) != 0)
3425 return FALSE;
3426 ecode++;
3427 break;
3428
3429 case OP_WHITESPACE:
3430 if (eptr >= md->end_subject ||
3431 (md->ctypes[*eptr++] & ctype_space) == 0)
3432 return FALSE;
3433 ecode++;
3434 break;
3435
3436 case OP_NOT_WORDCHAR:
3437 if (eptr >= md->end_subject ||
3438 (md->ctypes[*eptr++] & ctype_word) != 0)
3439 return FALSE;
3440 ecode++;
3441 break;
3442
3443 case OP_WORDCHAR:
3444 if (eptr >= md->end_subject ||
3445 (md->ctypes[*eptr++] & ctype_word) == 0)
3446 return FALSE;
3447 ecode++;
3448 break;
3449
3450 /* Match a back reference, possibly repeatedly. Look past the end of the
3451 item to see if there is repeat information following. The code is similar
3452 to that for character classes, but repeated for efficiency. Then obey
3453 similar code to character type repeats - written out again for speed.
3454 However, if the referenced string is the empty string, always treat
3455 it as matched, any number of times (otherwise there could be infinite
3456 loops). */
3457
3458 case OP_REF:
3459 {
3460 int length;
3461 int offset = ecode[1] << 1; /* Doubled reference number */
3462 ecode += 2; /* Advance past the item */
3463
3464 /* If the reference is unset, set the length to be longer than the amount
3465 of subject left; this ensures that every attempt at a match fails. We
3466 can't just fail here, because of the possibility of quantifiers with zero
3467 minima. */
3468
3469 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3470 md->end_subject - eptr + 1 :
3471 md->offset_vector[offset+1] - md->offset_vector[offset];
3472
3473 /* Set up for repetition, or handle the non-repeated case */
3474
3475 switch (*ecode)
3476 {
3477 case OP_CRSTAR:
3478 case OP_CRMINSTAR:
3479 case OP_CRPLUS:
3480 case OP_CRMINPLUS:
3481 case OP_CRQUERY:
3482 case OP_CRMINQUERY:
3483 c = *ecode++ - OP_CRSTAR;
3484 minimize = (c & 1) != 0;
3485 min = rep_min[c]; /* Pick up values from tables; */
3486 max = rep_max[c]; /* zero for max => infinity */
3487 if (max == 0) max = INT_MAX;
3488 break;
3489
3490 case OP_CRRANGE:
3491 case OP_CRMINRANGE:
3492 minimize = (*ecode == OP_CRMINRANGE);
3493 min = (ecode[1] << 8) + ecode[2];
3494 max = (ecode[3] << 8) + ecode[4];
3495 if (max == 0) max = INT_MAX;
3496 ecode += 5;
3497 break;
3498
3499 default: /* No repeat follows */
3500 if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3501 eptr += length;
3502 continue; /* With the main loop */
3503 }
3504
3505 /* If the length of the reference is zero, just continue with the
3506 main loop. */
3507
3508 if (length == 0) continue;
3509
3510 /* First, ensure the minimum number of matches are present. We get back
3511 the length of the reference string explicitly rather than passing the
3512 address of eptr, so that eptr can be a register variable. */
3513
3514 for (i = 1; i <= min; i++)
3515 {
3516 if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3517 eptr += length;
3518 }
3519
3520 /* If min = max, continue at the same level without recursion.
3521 They are not both allowed to be zero. */
3522
3523 if (min == max) continue;
3524
3525 /* If minimizing, keep trying and advancing the pointer */
3526
3527 if (minimize)
3528 {
3529 for (i = min;; i++)
3530 {
3531 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3532 return TRUE;
3533 if (i >= max || !match_ref(offset, eptr, length, md, ims))
3534 return FALSE;
3535 eptr += length;
3536 }
3537 /* Control never gets here */
3538 }
3539
3540 /* If maximizing, find the longest string and work backwards */
3541
3542 else
3543 {
3544 const uschar *pp = eptr;
3545 for (i = min; i < max; i++)
3546 {
3547 if (!match_ref(offset, eptr, length, md, ims)) break;
3548 eptr += length;
3549 }
3550 while (eptr >= pp)
3551 {
3552 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3553 return TRUE;
3554 eptr -= length;
3555 }
3556 return FALSE;
3557 }
3558 }
3559 /* Control never gets here */
3560
3561
3562
3563 /* Match a character class, possibly repeatedly. Look past the end of the
3564 item to see if there is repeat information following. Then obey similar
3565 code to character type repeats - written out again for speed. */
3566
3567 case OP_CLASS:
3568 {
3569 const uschar *data = ecode + 1; /* Save for matching */
3570 ecode += 33; /* Advance past the item */
3571
3572 switch (*ecode)
3573 {
3574 case OP_CRSTAR:
3575 case OP_CRMINSTAR:
3576 case OP_CRPLUS:
3577 case OP_CRMINPLUS:
3578 case OP_CRQUERY:
3579 case OP_CRMINQUERY:
3580 c = *ecode++ - OP_CRSTAR;
3581 minimize = (c & 1) != 0;
3582 min = rep_min[c]; /* Pick up values from tables; */
3583 max = rep_max[c]; /* zero for max => infinity */
3584 if (max == 0) max = INT_MAX;
3585 break;
3586
3587 case OP_CRRANGE:
3588 case OP_CRMINRANGE:
3589 minimize = (*ecode == OP_CRMINRANGE);
3590 min = (ecode[1] << 8) + ecode[2];
3591 max = (ecode[3] << 8) + ecode[4];
3592 if (max == 0) max = INT_MAX;
3593 ecode += 5;
3594 break;
3595
3596 default: /* No repeat follows */
3597 min = max = 1;
3598 break;
3599 }
3600
3601 /* First, ensure the minimum number of matches are present. */
3602
3603 for (i = 1; i <= min; i++)
3604 {
3605 if (eptr >= md->end_subject) return FALSE;
3606 c = *eptr++;
3607 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3608 return FALSE;
3609 }
3610
3611 /* If max == min we can continue with the main loop without the
3612 need to recurse. */
3613
3614 if (min == max) continue;
3615
3616 /* If minimizing, keep testing the rest of the expression and advancing
3617 the pointer while it matches the class. */
3618
3619 if (minimize)
3620 {
3621 for (i = min;; i++)
3622 {
3623 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3624 return TRUE;
3625 if (i >= max || eptr >= md->end_subject) return FALSE;
3626 c = *eptr++;
3627 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3628 return FALSE;
3629 }
3630 /* Control never gets here */
3631 }
3632
3633 /* If maximizing, find the longest possible run, then work backwards. */
3634
3635 else
3636 {
3637 const uschar *pp = eptr;
3638 for (i = min; i < max; eptr++, i++)
3639 {
3640 if (eptr >= md->end_subject) break;
3641 c = *eptr;
3642 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3643 break;
3644 }
3645
3646 while (eptr >= pp)
3647 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3648 return TRUE;
3649 return FALSE;
3650 }
3651 }
3652 /* Control never gets here */
3653
3654 /* Match a run of characters */
3655
3656 case OP_CHARS:
3657 {
3658 register int length = ecode[1];
3659 ecode += 2;
3660
3661 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3662 if (eptr >= md->end_subject)
3663 printf("matching subject <null> against pattern ");
3664 else
3665 {
3666 printf("matching subject ");
3667 pchars(eptr, length, TRUE, md);
3668 printf(" against pattern ");
3669 }
3670 pchars(ecode, length, FALSE, md);
3671 printf("\n");
3672 #endif
3673
3674 if (length > md->end_subject - eptr) return FALSE;
3675 if ((ims & PCRE_CASELESS) != 0)
3676 {
3677 while (length-- > 0)
3678 if (md->lcc[*ecode++] != md->lcc[*eptr++])
3679 return FALSE;
3680 }
3681 else
3682 {
3683 while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
3684 }
3685 }
3686 break;
3687
3688 /* Match a single character repeatedly; different opcodes share code. */
3689
3690 case OP_EXACT:
3691 min = max = (ecode[1] << 8) + ecode[2];
3692 ecode += 3;
3693 goto REPEATCHAR;
3694
3695 case OP_UPTO:
3696 case OP_MINUPTO:
3697 min = 0;
3698 max = (ecode[1] << 8) + ecode[2];
3699 minimize = *ecode == OP_MINUPTO;
3700 ecode += 3;
3701 goto REPEATCHAR;
3702
3703 case OP_STAR:
3704 case OP_MINSTAR:
3705 case OP_PLUS:
3706 case OP_MINPLUS:
3707 case OP_QUERY:
3708 case OP_MINQUERY:
3709 c = *ecode++ - OP_STAR;
3710 minimize = (c & 1) != 0;
3711 min = rep_min[c]; /* Pick up values from tables; */
3712 max = rep_max[c]; /* zero for max => infinity */
3713 if (max == 0) max = INT_MAX;
3714
3715 /* Common code for all repeated single-character matches. We can give
3716 up quickly if there are fewer than the minimum number of characters left in
3717 the subject. */
3718
3719 REPEATCHAR:
3720 if (min > md->end_subject - eptr) return FALSE;
3721 c = *ecode++;
3722
3723 /* The code is duplicated for the caseless and caseful cases, for speed,
3724 since matching characters is likely to be quite common. First, ensure the
3725 minimum number of matches are present. If min = max, continue at the same
3726 level without recursing. Otherwise, if minimizing, keep trying the rest of
3727 the expression and advancing one matching character if failing, up to the
3728 maximum. Alternatively, if maximizing, find the maximum number of
3729 characters and work backwards. */
3730
3731 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3732 max, eptr));
3733
3734 if ((ims & PCRE_CASELESS) != 0)
3735 {
3736 c = md->lcc[c];
3737 for (i = 1; i <= min; i++)
3738 if (c != md->lcc[*eptr++]) return FALSE;
3739 if (min == max) continue;
3740 if (minimize)
3741 {
3742 for (i = min;; i++)
3743 {
3744 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3745 return TRUE;
3746 if (i >= max || eptr >= md->end_subject ||
3747 c != md->lcc[*eptr++])
3748 return FALSE;
3749 }
3750 /* Control never gets here */
3751 }
3752 else
3753 {
3754 const uschar *pp = eptr;
3755 for (i = min; i < max; i++)
3756 {
3757 if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3758 eptr++;
3759 }
3760 while (eptr >= pp)
3761 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3762 return TRUE;
3763 return FALSE;
3764 }
3765 /* Control never gets here */
3766 }
3767
3768 /* Caseful comparisons */
3769
3770 else
3771 {
3772 for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3773 if (min == max) continue;
3774 if (minimize)
3775 {
3776 for (i = min;; i++)
3777 {
3778 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3779 return TRUE;
3780 if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3781 }
3782 /* Control never gets here */
3783 }
3784 else
3785 {
3786 const uschar *pp = eptr;
3787 for (i = min; i < max; i++)
3788 {
3789 if (eptr >= md->end_subject || c != *eptr) break;
3790 eptr++;
3791 }
3792 while (eptr >= pp)
3793 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3794 return TRUE;
3795 return FALSE;
3796 }
3797 }
3798 /* Control never gets here */
3799
3800 /* Match a negated single character */
3801
3802 case OP_NOT:
3803 if (eptr >= md->end_subject) return FALSE;
3804 ecode++;
3805 if ((ims & PCRE_CASELESS) != 0)
3806 {
3807 if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3808 }
3809 else
3810 {
3811 if (*ecode++ == *eptr++) return FALSE;
3812 }
3813 break;
3814
3815 /* Match a negated single character repeatedly. This is almost a repeat of
3816 the code for a repeated single character, but I haven't found a nice way of
3817 commoning these up that doesn't require a test of the positive/negative
3818 option for each character match. Maybe that wouldn't add very much to the
3819 time taken, but character matching *is* what this is all about... */
3820
3821 case OP_NOTEXACT:
3822 min = max = (ecode[1] << 8) + ecode[2];
3823 ecode += 3;
3824 goto REPEATNOTCHAR;
3825
3826 case OP_NOTUPTO:
3827 case OP_NOTMINUPTO:
3828 min = 0;
3829 max = (ecode[1] << 8) + ecode[2];
3830 minimize = *ecode == OP_NOTMINUPTO;
3831 ecode += 3;
3832 goto REPEATNOTCHAR;
3833
3834 case OP_NOTSTAR:
3835 case OP_NOTMINSTAR:
3836 case OP_NOTPLUS:
3837 case OP_NOTMINPLUS:
3838 case OP_NOTQUERY:
3839 case OP_NOTMINQUERY:
3840 c = *ecode++ - OP_NOTSTAR;
3841 minimize = (c & 1) != 0;
3842 min = rep_min[c]; /* Pick up values from tables; */
3843 max = rep_max[c]; /* zero for max => infinity */
3844 if (max == 0) max = INT_MAX;
3845
3846 /* Common code for all repeated single-character matches. We can give
3847 up quickly if there are fewer than the minimum number of characters left in
3848 the subject. */
3849
3850 REPEATNOTCHAR:
3851 if (min > md->end_subject - eptr) return FALSE;
3852 c = *ecode++;
3853
3854 /* The code is duplicated for the caseless and caseful cases, for speed,
3855 since matching characters is likely to be quite common. First, ensure the
3856 minimum number of matches are present. If min = max, continue at the same
3857 level without recursing. Otherwise, if minimizing, keep trying the rest of
3858 the expression and advancing one matching character if failing, up to the
3859 maximum. Alternatively, if maximizing, find the maximum number of
3860 characters and work backwards. */
3861
3862 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3863 max, eptr));
3864
3865 if ((ims & PCRE_CASELESS) != 0)
3866 {
3867 c = md->lcc[c];
3868 for (i = 1; i <= min; i++)
3869 if (c == md->lcc[*eptr++]) return FALSE;
3870 if (min == max) continue;
3871 if (minimize)
3872 {
3873 for (i = min;; i++)
3874 {
3875 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3876 return TRUE;
3877 if (i >= max || eptr >= md->end_subject ||
3878 c == md->lcc[*eptr++])
3879 return FALSE;
3880 }
3881 /* Control never gets here */
3882 }
3883 else
3884 {
3885 const uschar *pp = eptr;
3886 for (i = min; i < max; i++)
3887 {
3888 if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3889 eptr++;
3890 }
3891 while (eptr >= pp)
3892 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3893 return TRUE;
3894 return FALSE;
3895 }
3896 /* Control never gets here */
3897 }
3898
3899 /* Caseful comparisons */
3900
3901 else
3902 {
3903 for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3904 if (min == max) continue;
3905 if (minimize)
3906 {
3907 for (i = min;; i++)
3908 {
3909 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3910 return TRUE;
3911 if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3912 }
3913 /* Control never gets here */
3914 }
3915 else
3916 {
3917 const uschar *pp = eptr;
3918 for (i = min; i < max; i++)
3919 {
3920 if (eptr >= md->end_subject || c == *eptr) break;
3921 eptr++;
3922 }
3923 while (eptr >= pp)
3924 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3925 return TRUE;
3926 return FALSE;
3927 }
3928 }
3929 /* Control never gets here */
3930
3931 /* Match a single character type repeatedly; several different opcodes
3932 share code. This is very similar to the code for single characters, but we
3933 repeat it in the interests of efficiency. */
3934
3935 case OP_TYPEEXACT:
3936 min = max = (ecode[1] << 8) + ecode[2];
3937 minimize = TRUE;
3938 ecode += 3;
3939 goto REPEATTYPE;
3940
3941 case OP_TYPEUPTO:
3942 case OP_TYPEMINUPTO:
3943 min = 0;
3944 max = (ecode[1] << 8) + ecode[2];
3945 minimize = *ecode == OP_TYPEMINUPTO;
3946 ecode += 3;
3947 goto REPEATTYPE;
3948
3949 case OP_TYPESTAR:
3950 case OP_TYPEMINSTAR:
3951 case OP_TYPEPLUS:
3952 case OP_TYPEMINPLUS:
3953 case OP_TYPEQUERY:
3954 case OP_TYPEMINQUERY:
3955 c = *ecode++ - OP_TYPESTAR;
3956 minimize = (c & 1) != 0;
3957 min = rep_min[c]; /* Pick up values from tables; */
3958 max = rep_max[c]; /* zero for max => infinity */
3959 if (max == 0) max = INT_MAX;
3960
3961 /* Common code for all repeated single character type matches */
3962
3963 REPEATTYPE:
3964 ctype = *ecode++; /* Code for the character type */
3965
3966 /* First, ensure the minimum number of matches are present. Use inline
3967 code for maximizing the speed, and do the type test once at the start
3968 (i.e. keep it out of the loop). Also test that there are at least the
3969 minimum number of characters before we start. */
3970
3971 if (min > md->end_subject - eptr) return FALSE;
3972 if (min > 0) switch(ctype)
3973 {
3974 case OP_ANY:
3975 if ((ims & PCRE_DOTALL) == 0)
3976 { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3977 else eptr += min;
3978 break;
3979
3980 case OP_NOT_DIGIT:
3981 for (i = 1; i <= min; i++)
3982 if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3983 break;
3984
3985 case OP_DIGIT:
3986 for (i = 1; i <= min; i++)
3987 if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3988 break;
3989
3990 case OP_NOT_WHITESPACE:
3991 for (i = 1; i <= min; i++)
3992 if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3993 break;
3994
3995 case OP_WHITESPACE:
3996 for (i = 1; i <= min; i++)
3997 if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3998 break;
3999
4000 case OP_NOT_WORDCHAR:
4001 for (i = 1; i <= min; i++)
4002 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4003 return FALSE;
4004 break;
4005
4006 case OP_WORDCHAR:
4007 for (i = 1; i <= min; i++)
4008 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4009 return FALSE;
4010 break;
4011 }
4012
4013 /* If min = max, continue at the same level without recursing */
4014
4015 if (min == max) continue;
4016
4017 /* If minimizing, we have to test the rest of the pattern before each
4018 subsequent match. */
4019
4020 if (minimize)
4021 {
4022 for (i = min;; i++)
4023 {
4024 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
4025 if (i >= max || eptr >= md->end_subject) return FALSE;
4026
4027 c = *eptr++;
4028 switch(ctype)
4029 {
4030 case OP_ANY:
4031 if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4032 break;
4033
4034 case OP_NOT_DIGIT:
4035 if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4036 break;
4037
4038 case OP_DIGIT:
4039 if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4040 break;
4041
4042 case OP_NOT_WHITESPACE:
4043 if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4044 break;
4045
4046 case OP_WHITESPACE:
4047 if ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4048 break;
4049
4050 case OP_NOT_WORDCHAR:
4051 if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4052 break;
4053
4054 case OP_WORDCHAR:
4055 if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4056 break;
4057 }
4058 }
4059 /* Control never gets here */
4060 }
4061
4062 /* If maximizing it is worth using inline code for speed, doing the type
4063 test once at the start (i.e. keep it out of the loop). */
4064
4065 else
4066 {
4067 const uschar *pp = eptr;
4068 switch(ctype)
4069 {
4070 case OP_ANY:
4071 if ((ims & PCRE_DOTALL) == 0)
4072 {
4073 for (i = min; i < max; i++)
4074 {
4075 if (eptr >= md->end_subject || *eptr == '\n') break;
4076 eptr++;
4077 }
4078 }
4079 else
4080 {
4081 c = max - min;
4082 if (c > md->end_subject - eptr) c = md->end_subject - eptr;
4083 eptr += c;
4084 }
4085 break;
4086
4087 case OP_NOT_DIGIT:
4088 for (i = min; i < max; i++)
4089 {
4090 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4091 break;
4092 eptr++;
4093 }
4094 break;
4095
4096 case OP_DIGIT:
4097 for (i = min; i < max; i++)
4098 {
4099 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4100 break;
4101 eptr++;
4102 }
4103 break;
4104
4105 case OP_NOT_WHITESPACE:
4106 for (i = min; i < max; i++)
4107 {
4108 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4109 break;
4110 eptr++;
4111 }
4112 break;
4113
4114 case OP_WHITESPACE:
4115 for (i = min; i < max; i++)
4116 {
4117 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4118 break;
4119 eptr++;
4120 }
4121 break;
4122
4123 case OP_NOT_WORDCHAR:
4124 for (i = min; i < max; i++)
4125 {
4126 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4127 break;
4128 eptr++;
4129 }
4130 break;
4131
4132 case OP_WORDCHAR:
4133 for (i = min; i < max; i++)
4134 {
4135 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4136 break;
4137 eptr++;
4138 }
4139 break;
4140 }
4141
4142 while (eptr >= pp)
4143 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4144 return TRUE;
4145 return FALSE;
4146 }
4147 /* Control never gets here */
4148
4149 /* There's been some horrible disaster. */
4150
4151 default:
4152 DPRINTF(("Unknown opcode %d\n", *ecode));
4153 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
4154 return FALSE;
4155 }
4156
4157 /* Do not stick any code in here without much thought; it is assumed
4158 that "continue" in the code above comes out to here to repeat the main
4159 loop. */
4160
4161 } /* End of main loop */
4162 /* Control never reaches here */
4163 }
4164
4165
4166
4167
4168 /*************************************************
4169 * Execute a Regular Expression *
4170 *************************************************/
4171
4172 /* This function applies a compiled re to a subject string and picks out
4173 portions of the string if it matches. Two elements in the vector are set for
4174 each substring: the offsets to the start and end of the substring.
4175
4176 Arguments:
4177 external_re points to the compiled expression
4178 external_extra points to "hints" from pcre_study() or is NULL
4179 subject points to the subject string
4180 length length of subject string (may contain binary zeros)
4181 start_offset where to start in the subject string
4182 options option bits
4183 offsets points to a vector of ints to be filled in with offsets
4184 offsetcount the number of elements in the vector
4185
4186 Returns: > 0 => success; value is the number of elements filled in
4187 = 0 => success, but offsets is not big enough
4188 -1 => failed to match
4189 < -1 => some kind of unexpected problem
4190 */
4191
4192 int
4193 pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4194 const char *subject, int length, int start_offset, int options, int *offsets,
4195 int offsetcount)
4196 {
4197 int resetcount, ocount;
4198 int first_char = -1;
4199 int req_char = -1;
4200 int req_char2 = -1;
4201 unsigned long int ims = 0;
4202 match_data match_block;
4203 const uschar *start_bits = NULL;
4204 const uschar *start_match = (const uschar *)subject + start_offset;
4205 const uschar *end_subject;
4206 const uschar *req_char_ptr = start_match - 1;
4207 const real_pcre *re = (const real_pcre *)external_re;
4208 const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4209 BOOL using_temporary_offsets = FALSE;
4210 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4211 BOOL startline = (re->options & PCRE_STARTLINE) != 0;
4212
4213 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4214
4215 if (re == NULL || subject == NULL ||
4216 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4217 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4218
4219 match_block.start_subject = (const uschar *)subject;
4220 match_block.end_subject = match_block.start_subject + length;
4221 end_subject = match_block.end_subject;
4222
4223 match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4224
4225 match_block.notbol = (options & PCRE_NOTBOL) != 0;
4226 match_block.noteol = (options & PCRE_NOTEOL) != 0;
4227 match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4228
4229 match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
4230
4231 match_block.lcc = re->tables + lcc_offset;
4232 match_block.ctypes = re->tables + ctypes_offset;
4233
4234 /* The ims options can vary during the matching as a result of the presence
4235 of (?ims) items in the pattern. They are kept in a local variable so that
4236 restoring at the exit of a group is easy. */
4237
4238 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4239
4240 /* If the expression has got more back references than the offsets supplied can
4241 hold, we get a temporary bit of working store to use during the matching.
4242 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4243 of 3. */
4244
4245 ocount = offsetcount - (offsetcount % 3);
4246
4247 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4248 {
4249 ocount = re->top_backref * 3 + 3;
4250 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4251 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4252 using_temporary_offsets = TRUE;
4253 DPRINTF(("Got memory to hold back references\n"));
4254 }
4255 else match_block.offset_vector = offsets;
4256
4257 match_block.offset_end = ocount;
4258 match_block.offset_max = (2*ocount)/3;
4259 match_block.offset_overflow = FALSE;
4260
4261 /* Compute the minimum number of offsets that we need to reset each time. Doing
4262 this makes a huge difference to execution time when there aren't many brackets
4263 in the pattern. */
4264
4265 resetcount = 2 + re->top_bracket * 2;
4266 if (resetcount > offsetcount) resetcount = ocount;
4267
4268 /* Reset the working variable associated with each extraction. These should
4269 never be used unless previously set, but they get saved and restored, and so we
4270 initialize them to avoid reading uninitialized locations. */
4271
4272 if (match_block.offset_vector != NULL)
4273 {
4274 register int *iptr = match_block.offset_vector + ocount;
4275 register int *iend = iptr - resetcount/2 + 1;
4276 while (--iptr >= iend) *iptr = -1;
4277 }
4278
4279 /* Set up the first character to match, if available. The first_char value is
4280 never set for an anchored regular expression, but the anchoring may be forced
4281 at run time, so we have to test for anchoring. The first char may be unset for
4282 an unanchored pattern, of course. If there's no first char and the pattern was
4283 studied, there may be a bitmap of possible first characters. */
4284
4285 if (!anchored)
4286 {
4287 if ((re->options & PCRE_FIRSTSET) != 0)
4288 {
4289 first_char = re->first_char;
4290 if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4291 }
4292 else
4293 if (!startline && extra != NULL &&
4294 (extra->options & PCRE_STUDY_MAPPED) != 0)
4295 start_bits = extra->start_bits;
4296 }
4297
4298 /* For anchored or unanchored matches, there may be a "last known required
4299 character" set. If the PCRE_CASELESS is set, implying that the match starts
4300 caselessly, or if there are any changes of this flag within the regex, set up
4301 both cases of the character. Otherwise set the two values the same, which will
4302 avoid duplicate testing (which takes significant time). This covers the vast
4303 majority of cases. It will be suboptimal when the case flag changes in a regex
4304 and the required character in fact is caseful. */
4305
4306 if ((re->options & PCRE_REQCHSET) != 0)
4307 {
4308 req_char = re->req_char;
4309 req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
4310 (re->tables + fcc_offset)[req_char] : req_char;
4311 }
4312
4313 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4314 the loop runs just once. */
4315
4316 do
4317 {
4318 int rc;
4319 register int *iptr = match_block.offset_vector;
4320 register int *iend = iptr + resetcount;
4321
4322 /* Reset the maximum number of extractions we might see. */
4323
4324 while (iptr < iend) *iptr++ = -1;
4325
4326 /* Advance to a unique first char if possible */
4327
4328 if (first_char >= 0)
4329 {
4330 if ((ims & PCRE_CASELESS) != 0)
4331 while (start_match < end_subject &&
4332 match_block.lcc[*start_match] != first_char)
4333 start_match++;
4334 else
4335 while (start_match < end_subject && *start_match != first_char)
4336 start_match++;
4337 }
4338
4339 /* Or to just after \n for a multiline match if possible */
4340
4341 else if (startline)
4342 {
4343 if (start_match > match_block.start_subject + start_offset)
4344 {
4345 while (start_match < end_subject && start_match[-1] != '\n')
4346 start_match++;
4347 }
4348 }
4349
4350 /* Or to a non-unique first char after study */
4351
4352 else if (start_bits != NULL)
4353 {
4354 while (start_match < end_subject)
4355 {
4356 register int c = *start_match;
4357 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4358 }
4359 }
4360
4361 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4362 printf(">>>> Match against: ");
4363 pchars(start_match, end_subject - start_match, TRUE, &match_block);
4364 printf("\n");
4365 #endif
4366
4367 /* If req_char is set, we know that that character must appear in the subject
4368 for the match to succeed. If the first character is set, req_char must be
4369 later in the subject; otherwise the test starts at the match point. This
4370 optimization can save a huge amount of backtracking in patterns with nested
4371 unlimited repeats that aren't going to match. We don't know what the state of
4372 case matching may be when this character is hit, so test for it in both its
4373 cases if necessary. However, the different cased versions will not be set up
4374 unless PCRE_CASELESS was given or the casing state changes within the regex.
4375 Writing separate code makes it go faster, as does using an autoincrement and
4376 backing off on a match. */
4377
4378 if (req_char >= 0)
4379 {
4380 register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
4381
4382 /* We don't need to repeat the search if we haven't yet reached the
4383 place we found it at last time. */
4384
4385 if (p > req_char_ptr)
4386 {
4387 /* Do a single test if no case difference is set up */
4388
4389 if (req_char == req_char2)
4390 {
4391 while (p < end_subject)
4392 {
4393 if (*p++ == req_char) { p--; break; }
4394 }
4395 }
4396
4397 /* Otherwise test for either case */
4398
4399 else
4400 {
4401 while (p < end_subject)
4402 {
4403 register int pp = *p++;
4404 if (pp == req_char || pp == req_char2) { p--; break; }
4405 }
4406 }
4407
4408 /* If we can't find the required character, break the matching loop */
4409
4410 if (p >= end_subject) break;
4411
4412 /* If we have found the required character, save the point where we
4413 found it, so that we don't search again next time round the loop if
4414 the start hasn't passed this character yet. */
4415
4416 req_char_ptr = p;
4417 }
4418 }
4419
4420 /* When a match occurs, substrings will be set for all internal extractions;
4421 we just need to set up the whole thing as substring 0 before returning. If
4422 there were too many extractions, set the return code to zero. In the case
4423 where we had to get some local store to hold offsets for backreferences, copy
4424 those back references that we can. In this case there need not be overflow
4425 if certain parts of the pattern were not used. */
4426
4427 match_block.start_match = start_match;
4428 if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4429 continue;
4430
4431 /* Copy the offset information from temporary store if necessary */
4432
4433 if (using_temporary_offsets)
4434 {
4435 if (offsetcount >= 4)
4436 {
4437 memcpy(offsets + 2, match_block.offset_vector + 2,
4438 (offsetcount - 2) * sizeof(int));
4439 DPRINTF(("Copied offsets from temporary memory\n"));
4440 }
4441 if (match_block.end_offset_top > offsetcount)
4442 match_block.offset_overflow = TRUE;
4443
4444 DPRINTF(("Freeing temporary memory\n"));
4445 (pcre_free)(match_block.offset_vector);
4446 }
4447
4448 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
4449
4450 if (match_block.offset_end < 2) rc = 0; else
4451 {
4452 offsets[0] = start_match - match_block.start_subject;
4453 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
4454 }
4455
4456 DPRINTF((">>>> returning %d\n", rc));
4457 return rc;
4458 }
4459
4460 /* This "while" is the end of the "do" above */
4461
4462 while (!anchored &&
4463 match_block.errorcode == PCRE_ERROR_NOMATCH &&
4464 start_match++ < end_subject);
4465
4466 if (using_temporary_offsets)
4467 {
4468 DPRINTF(("Freeing temporary memory\n"));
4469 (pcre_free)(match_block.offset_vector);
4470 }
4471
4472 DPRINTF((">>>> returning %d\n", match_block.errorcode));
4473
4474 return match_block.errorcode;
4475 }
4476
4477 /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12