/[pcre]/code/trunk/pcre.c
ViewVC logotype

Contents of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 37 - (show annotations) (download)
Sat Feb 24 21:39:09 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 131874 byte(s)
Load pcre-2.07 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 Written by: Philip Hazel <ph10@cam.ac.uk>
11
12 Copyright (c) 1997-1999 University of Cambridge
13
14 -----------------------------------------------------------------------------
15 Permission is granted to anyone to use this software for any purpose on any
16 computer system, and to redistribute it freely, subject to the following
17 restrictions:
18
19 1. This software is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22
23 2. The origin of this software must not be misrepresented, either by
24 explicit claim or by omission.
25
26 3. Altered versions must be plainly marked as such, and must not be
27 misrepresented as being the original software.
28
29 4. If PCRE is embedded in any software that is released under the GNU
30 General Purpose Licence (GPL), then the terms of that licence shall
31 supersede any condition above with which it is incompatible.
32 -----------------------------------------------------------------------------
33 */
34
35
36 /* Define DEBUG to get debugging output on stdout. */
37
38 /* #define DEBUG */
39
40 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41 inline, and there are *still* stupid compilers about that don't like indented
42 pre-processor statements. I suppose it's only been 10 years... */
43
44 #ifdef DEBUG
45 #define DPRINTF(p) printf p
46 #else
47 #define DPRINTF(p) /*nothing*/
48 #endif
49
50 /* Include the internals header, which itself includes Standard C headers plus
51 the external pcre header. */
52
53 #include "internal.h"
54
55
56 /* Allow compilation as C++ source code, should anybody want to do that. */
57
58 #ifdef __cplusplus
59 #define class pcre_class
60 #endif
61
62
63 /* Number of items on the nested bracket stacks at compile time. This should
64 not be set greater than 200. */
65
66 #define BRASTACK_SIZE 200
67
68
69 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70
71 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72 static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73
74 /* Text forms of OP_ values and things, for debugging (not all used) */
75
76 #ifdef DEBUG
77 static const char *OP_names[] = {
78 "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79 "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80 "Opt", "^", "$", "Any", "chars", "not",
81 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83 "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84 "*", "*?", "+", "+?", "?", "??", "{", "{",
85 "class", "Ref",
86 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87 "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88 "Brazero", "Braminzero", "Bra"
89 };
90 #endif
91
92 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
93 are simple data values; negative values are for special things like \d and so
94 on. Zero means further processing is needed (for things like \x), or the escape
95 is invalid. */
96
97 static const short int escapes[] = {
98 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
99 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
100 '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
101 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
102 0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
103 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
104 '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
105 0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
106 0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
107 0, 0, -ESC_z /* x - z */
108 };
109
110 /* Definition to allow mutual recursion */
111
112 static BOOL
113 compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114 BOOL, int, int *, int *, compile_data *);
115
116
117
118 /*************************************************
119 * Global variables *
120 *************************************************/
121
122 /* PCRE is thread-clean and doesn't use any global variables in the normal
123 sense. However, it calls memory allocation and free functions via the two
124 indirections below, which are can be changed by the caller, but are shared
125 between all threads. */
126
127 void *(*pcre_malloc)(size_t) = malloc;
128 void (*pcre_free)(void *) = free;
129
130
131
132
133 /*************************************************
134 * Default character tables *
135 *************************************************/
136
137 /* A default set of character tables is included in the PCRE binary. Its source
138 is built by the maketables auxiliary program, which uses the default C ctypes
139 functions, and put in the file chartables.c. These tables are used by PCRE
140 whenever the caller of pcre_compile() does not provide an alternate set of
141 tables. */
142
143 #include "chartables.c"
144
145
146
147 /*************************************************
148 * Return version string *
149 *************************************************/
150
151 const char *
152 pcre_version(void)
153 {
154 return PCRE_VERSION;
155 }
156
157
158
159
160 /*************************************************
161 * Return info about a compiled pattern *
162 *************************************************/
163
164 /* This function picks potentially useful data out of the private
165 structure. The public options are passed back in an int - though the
166 re->options field has been expanded to a long int, all the public options
167 at the low end of it, and so even on 16-bit systems this will still be OK.
168 Therefore, I haven't changed the API for pcre_info().
169
170 Arguments:
171 external_re points to compiled code
172 optptr where to pass back the options
173 first_char where to pass back the first character,
174 or -1 if multiline and all branches start ^,
175 or -2 otherwise
176
177 Returns: number of identifying extraction brackets
178 or negative values on error
179 */
180
181 int
182 pcre_info(const pcre *external_re, int *optptr, int *first_char)
183 {
184 const real_pcre *re = (const real_pcre *)external_re;
185 if (re == NULL) return PCRE_ERROR_NULL;
186 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
187 if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
188 if (first_char != NULL)
189 *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
190 ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
191 return re->top_bracket;
192 }
193
194
195
196
197 #ifdef DEBUG
198 /*************************************************
199 * Debugging function to print chars *
200 *************************************************/
201
202 /* Print a sequence of chars in printable format, stopping at the end of the
203 subject if the requested.
204
205 Arguments:
206 p points to characters
207 length number to print
208 is_subject TRUE if printing from within md->start_subject
209 md pointer to matching data block, if is_subject is TRUE
210
211 Returns: nothing
212 */
213
214 static void
215 pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
216 {
217 int c;
218 if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
219 while (length-- > 0)
220 if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
221 }
222 #endif
223
224
225
226
227 /*************************************************
228 * Handle escapes *
229 *************************************************/
230
231 /* This function is called when a \ has been encountered. It either returns a
232 positive value for a simple escape such as \n, or a negative value which
233 encodes one of the more complicated things such as \d. On entry, ptr is
234 pointing at the \. On exit, it is on the final character of the escape
235 sequence.
236
237 Arguments:
238 ptrptr points to the pattern position pointer
239 errorptr points to the pointer to the error message
240 bracount number of previous extracting brackets
241 options the options bits
242 isclass TRUE if inside a character class
243 cd pointer to char tables block
244
245 Returns: zero or positive => a data character
246 negative => a special escape sequence
247 on error, errorptr is set
248 */
249
250 static int
251 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
252 int options, BOOL isclass, compile_data *cd)
253 {
254 const uschar *ptr = *ptrptr;
255 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
256 int i;
257
258 if (c == 0) *errorptr = ERR1;
259
260 /* Digits or letters may have special meaning; all others are literals. */
261
262 else if (c < '0' || c > 'z') {}
263
264 /* Do an initial lookup in a table. A non-zero result is something that can be
265 returned immediately. Otherwise further processing may be required. */
266
267 else if ((i = escapes[c - '0']) != 0) c = i;
268
269 /* Escapes that need further processing, or are illegal. */
270
271 else
272 {
273 const uschar *oldptr;
274 switch (c)
275 {
276 /* The handling of escape sequences consisting of a string of digits
277 starting with one that is not zero is not straightforward. By experiment,
278 the way Perl works seems to be as follows:
279
280 Outside a character class, the digits are read as a decimal number. If the
281 number is less than 10, or if there are that many previous extracting
282 left brackets, then it is a back reference. Otherwise, up to three octal
283 digits are read to form an escaped byte. Thus \123 is likely to be octal
284 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
285 value is greater than 377, the least significant 8 bits are taken. Inside a
286 character class, \ followed by a digit is always an octal number. */
287
288 case '1': case '2': case '3': case '4': case '5':
289 case '6': case '7': case '8': case '9':
290
291 if (!isclass)
292 {
293 oldptr = ptr;
294 c -= '0';
295 while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
296 c = c * 10 + *(++ptr) - '0';
297 if (c < 10 || c <= bracount)
298 {
299 c = -(ESC_REF + c);
300 break;
301 }
302 ptr = oldptr; /* Put the pointer back and fall through */
303 }
304
305 /* Handle an octal number following \. If the first digit is 8 or 9, Perl
306 generates a binary zero byte and treats the digit as a following literal.
307 Thus we have to pull back the pointer by one. */
308
309 if ((c = *ptr) >= '8')
310 {
311 ptr--;
312 c = 0;
313 break;
314 }
315
316 /* \0 always starts an octal number, but we may drop through to here with a
317 larger first octal digit */
318
319 case '0':
320 c -= '0';
321 while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
322 ptr[1] != '8' && ptr[1] != '9')
323 c = c * 8 + *(++ptr) - '0';
324 break;
325
326 /* Special escapes not starting with a digit are straightforward */
327
328 case 'x':
329 c = 0;
330 while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
331 {
332 ptr++;
333 c = c * 16 + cd->lcc[*ptr] -
334 (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
335 }
336 break;
337
338 case 'c':
339 c = *(++ptr);
340 if (c == 0)
341 {
342 *errorptr = ERR2;
343 return 0;
344 }
345
346 /* A letter is upper-cased; then the 0x40 bit is flipped */
347
348 if (c >= 'a' && c <= 'z') c = cd->fcc[c];
349 c ^= 0x40;
350 break;
351
352 /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
353 other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
354 for Perl compatibility, it is a literal. This code looks a bit odd, but
355 there used to be some cases other than the default, and there may be again
356 in future, so I haven't "optimized" it. */
357
358 default:
359 if ((options & PCRE_EXTRA) != 0) switch(c)
360 {
361 default:
362 *errorptr = ERR3;
363 break;
364 }
365 break;
366 }
367 }
368
369 *ptrptr = ptr;
370 return c;
371 }
372
373
374
375 /*************************************************
376 * Check for counted repeat *
377 *************************************************/
378
379 /* This function is called when a '{' is encountered in a place where it might
380 start a quantifier. It looks ahead to see if it really is a quantifier or not.
381 It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
382 where the ddds are digits.
383
384 Arguments:
385 p pointer to the first char after '{'
386 cd pointer to char tables block
387
388 Returns: TRUE or FALSE
389 */
390
391 static BOOL
392 is_counted_repeat(const uschar *p, compile_data *cd)
393 {
394 if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
395 while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
396 if (*p == '}') return TRUE;
397
398 if (*p++ != ',') return FALSE;
399 if (*p == '}') return TRUE;
400
401 if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
402 while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
403 return (*p == '}');
404 }
405
406
407
408 /*************************************************
409 * Read repeat counts *
410 *************************************************/
411
412 /* Read an item of the form {n,m} and return the values. This is called only
413 after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
414 so the syntax is guaranteed to be correct, but we need to check the values.
415
416 Arguments:
417 p pointer to first char after '{'
418 minp pointer to int for min
419 maxp pointer to int for max
420 returned as -1 if no max
421 errorptr points to pointer to error message
422 cd pointer to character tables clock
423
424 Returns: pointer to '}' on success;
425 current ptr on error, with errorptr set
426 */
427
428 static const uschar *
429 read_repeat_counts(const uschar *p, int *minp, int *maxp,
430 const char **errorptr, compile_data *cd)
431 {
432 int min = 0;
433 int max = -1;
434
435 while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
436
437 if (*p == '}') max = min; else
438 {
439 if (*(++p) != '}')
440 {
441 max = 0;
442 while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
443 if (max < min)
444 {
445 *errorptr = ERR4;
446 return p;
447 }
448 }
449 }
450
451 /* Do paranoid checks, then fill in the required variables, and pass back the
452 pointer to the terminating '}'. */
453
454 if (min > 65535 || max > 65535)
455 *errorptr = ERR5;
456 else
457 {
458 *minp = min;
459 *maxp = max;
460 }
461 return p;
462 }
463
464
465
466 /*************************************************
467 * Find the fixed length of a pattern *
468 *************************************************/
469
470 /* Scan a pattern and compute the fixed length of subject that will match it,
471 if the length is fixed. This is needed for dealing with backward assertions.
472
473 Arguments:
474 code points to the start of the pattern (the bracket)
475
476 Returns: the fixed length, or -1 if there is no fixed length
477 */
478
479 static int
480 find_fixedlength(uschar *code)
481 {
482 int length = -1;
483
484 register int branchlength = 0;
485 register uschar *cc = code + 3;
486
487 /* Scan along the opcodes for this branch. If we get to the end of the
488 branch, check the length against that of the other branches. */
489
490 for (;;)
491 {
492 int d;
493 register int op = *cc;
494 if (op >= OP_BRA) op = OP_BRA;
495
496 switch (op)
497 {
498 case OP_BRA:
499 case OP_ONCE:
500 case OP_COND:
501 d = find_fixedlength(cc);
502 if (d < 0) return -1;
503 branchlength += d;
504 do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
505 cc += 3;
506 break;
507
508 /* Reached end of a branch; if it's a ket it is the end of a nested
509 call. If it's ALT it is an alternation in a nested call. If it is
510 END it's the end of the outer call. All can be handled by the same code. */
511
512 case OP_ALT:
513 case OP_KET:
514 case OP_KETRMAX:
515 case OP_KETRMIN:
516 case OP_END:
517 if (length < 0) length = branchlength;
518 else if (length != branchlength) return -1;
519 if (*cc != OP_ALT) return length;
520 cc += 3;
521 branchlength = 0;
522 break;
523
524 /* Skip over assertive subpatterns */
525
526 case OP_ASSERT:
527 case OP_ASSERT_NOT:
528 case OP_ASSERTBACK:
529 case OP_ASSERTBACK_NOT:
530 do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
531 cc += 3;
532 break;
533
534 /* Skip over things that don't match chars */
535
536 case OP_REVERSE:
537 cc++;
538 /* Fall through */
539
540 case OP_CREF:
541 case OP_OPT:
542 cc++;
543 /* Fall through */
544
545 case OP_SOD:
546 case OP_EOD:
547 case OP_EODN:
548 case OP_CIRC:
549 case OP_DOLL:
550 case OP_NOT_WORD_BOUNDARY:
551 case OP_WORD_BOUNDARY:
552 cc++;
553 break;
554
555 /* Handle char strings */
556
557 case OP_CHARS:
558 branchlength += *(++cc);
559 cc += *cc + 1;
560 break;
561
562 /* Handle exact repetitions */
563
564 case OP_EXACT:
565 case OP_TYPEEXACT:
566 branchlength += (cc[1] << 8) + cc[2];
567 cc += 4;
568 break;
569
570 /* Handle single-char matchers */
571
572 case OP_NOT_DIGIT:
573 case OP_DIGIT:
574 case OP_NOT_WHITESPACE:
575 case OP_WHITESPACE:
576 case OP_NOT_WORDCHAR:
577 case OP_WORDCHAR:
578 case OP_ANY:
579 branchlength++;
580 cc++;
581 break;
582
583
584 /* Check a class for variable quantification */
585
586 case OP_CLASS:
587 cc += (*cc == OP_REF)? 2 : 33;
588
589 switch (*cc)
590 {
591 case OP_CRSTAR:
592 case OP_CRMINSTAR:
593 case OP_CRQUERY:
594 case OP_CRMINQUERY:
595 return -1;
596
597 case OP_CRRANGE:
598 case OP_CRMINRANGE:
599 if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
600 branchlength += (cc[1] << 8) + cc[2];
601 cc += 5;
602 break;
603
604 default:
605 branchlength++;
606 }
607 break;
608
609 /* Anything else is variable length */
610
611 default:
612 return -1;
613 }
614 }
615 /* Control never gets here */
616 }
617
618
619
620
621 /*************************************************
622 * Compile one branch *
623 *************************************************/
624
625 /* Scan the pattern, compiling it into the code vector.
626
627 Arguments:
628 options the option bits
629 brackets points to number of brackets used
630 code points to the pointer to the current code point
631 ptrptr points to the current pattern pointer
632 errorptr points to pointer to error message
633 optchanged set to the value of the last OP_OPT item compiled
634 reqchar set to the last literal character required, else -1
635 countlits set to count of mandatory literal characters
636 cd contains pointers to tables
637
638 Returns: TRUE on success
639 FALSE, with *errorptr set on error
640 */
641
642 static BOOL
643 compile_branch(int options, int *brackets, uschar **codeptr,
644 const uschar **ptrptr, const char **errorptr, int *optchanged,
645 int *reqchar, int *countlits, compile_data *cd)
646 {
647 int repeat_type, op_type;
648 int repeat_min, repeat_max;
649 int bravalue, length;
650 int greedy_default, greedy_non_default;
651 int prevreqchar;
652 int condcount = 0;
653 int subcountlits = 0;
654 register int c;
655 register uschar *code = *codeptr;
656 uschar *tempcode;
657 const uschar *ptr = *ptrptr;
658 const uschar *tempptr;
659 uschar *previous = NULL;
660 uschar class[32];
661
662 /* Set up the default and non-default settings for greediness */
663
664 greedy_default = ((options & PCRE_UNGREEDY) != 0);
665 greedy_non_default = greedy_default ^ 1;
666
667 /* Initialize no required char, and count of literals */
668
669 *reqchar = prevreqchar = -1;
670 *countlits = 0;
671
672 /* Switch on next character until the end of the branch */
673
674 for (;; ptr++)
675 {
676 BOOL negate_class;
677 int class_charcount;
678 int class_lastchar;
679 int newoptions;
680 int condref;
681 int subreqchar;
682
683 c = *ptr;
684 if ((options & PCRE_EXTENDED) != 0)
685 {
686 if ((cd->ctypes[c] & ctype_space) != 0) continue;
687 if (c == '#')
688 {
689 while ((c = *(++ptr)) != 0 && c != '\n');
690 continue;
691 }
692 }
693
694 switch(c)
695 {
696 /* The branch terminates at end of string, |, or ). */
697
698 case 0:
699 case '|':
700 case ')':
701 *codeptr = code;
702 *ptrptr = ptr;
703 return TRUE;
704
705 /* Handle single-character metacharacters */
706
707 case '^':
708 previous = NULL;
709 *code++ = OP_CIRC;
710 break;
711
712 case '$':
713 previous = NULL;
714 *code++ = OP_DOLL;
715 break;
716
717 case '.':
718 previous = code;
719 *code++ = OP_ANY;
720 break;
721
722 /* Character classes. These always build a 32-byte bitmap of the permitted
723 characters, except in the special case where there is only one character.
724 For negated classes, we build the map as usual, then invert it at the end.
725 */
726
727 case '[':
728 previous = code;
729 *code++ = OP_CLASS;
730
731 /* If the first character is '^', set the negation flag and skip it. */
732
733 if ((c = *(++ptr)) == '^')
734 {
735 negate_class = TRUE;
736 c = *(++ptr);
737 }
738 else negate_class = FALSE;
739
740 /* Keep a count of chars so that we can optimize the case of just a single
741 character. */
742
743 class_charcount = 0;
744 class_lastchar = -1;
745
746 /* Initialize the 32-char bit map to all zeros. We have to build the
747 map in a temporary bit of store, in case the class contains only 1
748 character, because in that case the compiled code doesn't use the
749 bit map. */
750
751 memset(class, 0, 32 * sizeof(uschar));
752
753 /* Process characters until ] is reached. By writing this as a "do" it
754 means that an initial ] is taken as a data character. */
755
756 do
757 {
758 if (c == 0)
759 {
760 *errorptr = ERR6;
761 goto FAILED;
762 }
763
764 /* Backslash may introduce a single character, or it may introduce one
765 of the specials, which just set a flag. Escaped items are checked for
766 validity in the pre-compiling pass. The sequence \b is a special case.
767 Inside a class (and only there) it is treated as backspace. Elsewhere
768 it marks a word boundary. Other escapes have preset maps ready to
769 or into the one we are building. We assume they have more than one
770 character in them, so set class_count bigger than one. */
771
772 if (c == '\\')
773 {
774 c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
775 if (-c == ESC_b) c = '\b';
776 else if (c < 0)
777 {
778 register const uschar *cbits = cd->cbits;
779 class_charcount = 10;
780 switch (-c)
781 {
782 case ESC_d:
783 for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
784 continue;
785
786 case ESC_D:
787 for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
788 continue;
789
790 case ESC_w:
791 for (c = 0; c < 32; c++)
792 class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
793 continue;
794
795 case ESC_W:
796 for (c = 0; c < 32; c++)
797 class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
798 continue;
799
800 case ESC_s:
801 for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
802 continue;
803
804 case ESC_S:
805 for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
806 continue;
807
808 default:
809 *errorptr = ERR7;
810 goto FAILED;
811 }
812 }
813 /* Fall through if single character */
814 }
815
816 /* A single character may be followed by '-' to form a range. However,
817 Perl does not permit ']' to be the end of the range. A '-' character
818 here is treated as a literal. */
819
820 if (ptr[1] == '-' && ptr[2] != ']')
821 {
822 int d;
823 ptr += 2;
824 d = *ptr;
825
826 if (d == 0)
827 {
828 *errorptr = ERR6;
829 goto FAILED;
830 }
831
832 /* The second part of a range can be a single-character escape, but
833 not any of the other escapes. */
834
835 if (d == '\\')
836 {
837 d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
838 if (d < 0)
839 {
840 if (d == -ESC_b) d = '\b'; else
841 {
842 *errorptr = ERR7;
843 goto FAILED;
844 }
845 }
846 }
847
848 if (d < c)
849 {
850 *errorptr = ERR8;
851 goto FAILED;
852 }
853
854 for (; c <= d; c++)
855 {
856 class[c/8] |= (1 << (c&7));
857 if ((options & PCRE_CASELESS) != 0)
858 {
859 int uc = cd->fcc[c]; /* flip case */
860 class[uc/8] |= (1 << (uc&7));
861 }
862 class_charcount++; /* in case a one-char range */
863 class_lastchar = c;
864 }
865 continue; /* Go get the next char in the class */
866 }
867
868 /* Handle a lone single character - we can get here for a normal
869 non-escape char, or after \ that introduces a single character. */
870
871 class [c/8] |= (1 << (c&7));
872 if ((options & PCRE_CASELESS) != 0)
873 {
874 c = cd->fcc[c]; /* flip case */
875 class[c/8] |= (1 << (c&7));
876 }
877 class_charcount++;
878 class_lastchar = c;
879 }
880
881 /* Loop until ']' reached; the check for end of string happens inside the
882 loop. This "while" is the end of the "do" above. */
883
884 while ((c = *(++ptr)) != ']');
885
886 /* If class_charcount is 1 and class_lastchar is not negative, we saw
887 precisely one character. This doesn't need the whole 32-byte bit map.
888 We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
889 it's negative. */
890
891 if (class_charcount == 1 && class_lastchar >= 0)
892 {
893 if (negate_class)
894 {
895 code[-1] = OP_NOT;
896 }
897 else
898 {
899 code[-1] = OP_CHARS;
900 *code++ = 1;
901 }
902 *code++ = class_lastchar;
903 }
904
905 /* Otherwise, negate the 32-byte map if necessary, and copy it into
906 the code vector. */
907
908 else
909 {
910 if (negate_class)
911 for (c = 0; c < 32; c++) code[c] = ~class[c];
912 else
913 memcpy(code, class, 32);
914 code += 32;
915 }
916 break;
917
918 /* Various kinds of repeat */
919
920 case '{':
921 if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
922 ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
923 if (*errorptr != NULL) goto FAILED;
924 goto REPEAT;
925
926 case '*':
927 repeat_min = 0;
928 repeat_max = -1;
929 goto REPEAT;
930
931 case '+':
932 repeat_min = 1;
933 repeat_max = -1;
934 goto REPEAT;
935
936 case '?':
937 repeat_min = 0;
938 repeat_max = 1;
939
940 REPEAT:
941 if (previous == NULL)
942 {
943 *errorptr = ERR9;
944 goto FAILED;
945 }
946
947 /* If the next character is '?' this is a minimizing repeat, by default,
948 but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
949 next character. */
950
951 if (ptr[1] == '?')
952 { repeat_type = greedy_non_default; ptr++; }
953 else repeat_type = greedy_default;
954
955 /* If previous was a string of characters, chop off the last one and use it
956 as the subject of the repeat. If there was only one character, we can
957 abolish the previous item altogether. A repeat with a zero minimum wipes
958 out any reqchar setting, backing up to the previous value. We must also
959 adjust the countlits value. */
960
961 if (*previous == OP_CHARS)
962 {
963 int len = previous[1];
964
965 if (repeat_min == 0) *reqchar = prevreqchar;
966 *countlits += repeat_min - 1;
967
968 if (len == 1)
969 {
970 c = previous[2];
971 code = previous;
972 }
973 else
974 {
975 c = previous[len+1];
976 previous[1]--;
977 code--;
978 }
979 op_type = 0; /* Use single-char op codes */
980 goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
981 }
982
983 /* If previous was a single negated character ([^a] or similar), we use
984 one of the special opcodes, replacing it. The code is shared with single-
985 character repeats by adding a suitable offset into repeat_type. */
986
987 else if ((int)*previous == OP_NOT)
988 {
989 op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
990 c = previous[1];
991 code = previous;
992 goto OUTPUT_SINGLE_REPEAT;
993 }
994
995 /* If previous was a character type match (\d or similar), abolish it and
996 create a suitable repeat item. The code is shared with single-character
997 repeats by adding a suitable offset into repeat_type. */
998
999 else if ((int)*previous < OP_EODN || *previous == OP_ANY)
1000 {
1001 op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
1002 c = *previous;
1003 code = previous;
1004
1005 OUTPUT_SINGLE_REPEAT:
1006
1007 /* If the maximum is zero then the minimum must also be zero; Perl allows
1008 this case, so we do too - by simply omitting the item altogether. */
1009
1010 if (repeat_max == 0) goto END_REPEAT;
1011
1012 /* Combine the op_type with the repeat_type */
1013
1014 repeat_type += op_type;
1015
1016 /* A minimum of zero is handled either as the special case * or ?, or as
1017 an UPTO, with the maximum given. */
1018
1019 if (repeat_min == 0)
1020 {
1021 if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
1022 else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
1023 else
1024 {
1025 *code++ = OP_UPTO + repeat_type;
1026 *code++ = repeat_max >> 8;
1027 *code++ = (repeat_max & 255);
1028 }
1029 }
1030
1031 /* The case {1,} is handled as the special case + */
1032
1033 else if (repeat_min == 1 && repeat_max == -1)
1034 *code++ = OP_PLUS + repeat_type;
1035
1036 /* The case {n,n} is just an EXACT, while the general case {n,m} is
1037 handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
1038
1039 else
1040 {
1041 if (repeat_min != 1)
1042 {
1043 *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
1044 *code++ = repeat_min >> 8;
1045 *code++ = (repeat_min & 255);
1046 }
1047
1048 /* If the mininum is 1 and the previous item was a character string,
1049 we either have to put back the item that got cancelled if the string
1050 length was 1, or add the character back onto the end of a longer
1051 string. For a character type nothing need be done; it will just get
1052 put back naturally. Note that the final character is always going to
1053 get added below. */
1054
1055 else if (*previous == OP_CHARS)
1056 {
1057 if (code == previous) code += 2; else previous[1]++;
1058 }
1059
1060 /* For a single negated character we also have to put back the
1061 item that got cancelled. */
1062
1063 else if (*previous == OP_NOT) code++;
1064
1065 /* If the maximum is unlimited, insert an OP_STAR. */
1066
1067 if (repeat_max < 0)
1068 {
1069 *code++ = c;
1070 *code++ = OP_STAR + repeat_type;
1071 }
1072
1073 /* Else insert an UPTO if the max is greater than the min. */
1074
1075 else if (repeat_max != repeat_min)
1076 {
1077 *code++ = c;
1078 repeat_max -= repeat_min;
1079 *code++ = OP_UPTO + repeat_type;
1080 *code++ = repeat_max >> 8;
1081 *code++ = (repeat_max & 255);
1082 }
1083 }
1084
1085 /* The character or character type itself comes last in all cases. */
1086
1087 *code++ = c;
1088 }
1089
1090 /* If previous was a character class or a back reference, we put the repeat
1091 stuff after it, but just skip the item if the repeat was {0,0}. */
1092
1093 else if (*previous == OP_CLASS || *previous == OP_REF)
1094 {
1095 if (repeat_max == 0)
1096 {
1097 code = previous;
1098 goto END_REPEAT;
1099 }
1100 if (repeat_min == 0 && repeat_max == -1)
1101 *code++ = OP_CRSTAR + repeat_type;
1102 else if (repeat_min == 1 && repeat_max == -1)
1103 *code++ = OP_CRPLUS + repeat_type;
1104 else if (repeat_min == 0 && repeat_max == 1)
1105 *code++ = OP_CRQUERY + repeat_type;
1106 else
1107 {
1108 *code++ = OP_CRRANGE + repeat_type;
1109 *code++ = repeat_min >> 8;
1110 *code++ = repeat_min & 255;
1111 if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1112 *code++ = repeat_max >> 8;
1113 *code++ = repeat_max & 255;
1114 }
1115 }
1116
1117 /* If previous was a bracket group, we may have to replicate it in certain
1118 cases. */
1119
1120 else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1121 (int)*previous == OP_COND)
1122 {
1123 register int i;
1124 int ketoffset = 0;
1125 int len = code - previous;
1126 uschar *bralink = NULL;
1127
1128 /* If the maximum repeat count is unlimited, find the end of the bracket
1129 by scanning through from the start, and compute the offset back to it
1130 from the current code pointer. There may be an OP_OPT setting following
1131 the final KET, so we can't find the end just by going back from the code
1132 pointer. */
1133
1134 if (repeat_max == -1)
1135 {
1136 register uschar *ket = previous;
1137 do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1138 ketoffset = code - ket;
1139 }
1140
1141 /* The case of a zero minimum is special because of the need to stick
1142 OP_BRAZERO in front of it, and because the group appears once in the
1143 data, whereas in other cases it appears the minimum number of times. For
1144 this reason, it is simplest to treat this case separately, as otherwise
1145 the code gets far too mess. There are several special subcases when the
1146 minimum is zero. */
1147
1148 if (repeat_min == 0)
1149 {
1150 /* If we set up a required char from the bracket, we must back off
1151 to the previous value and reset the countlits value too. */
1152
1153 if (subcountlits > 0)
1154 {
1155 *reqchar = prevreqchar;
1156 *countlits -= subcountlits;
1157 }
1158
1159 /* If the maximum is also zero, we just omit the group from the output
1160 altogether. */
1161
1162 if (repeat_max == 0)
1163 {
1164 code = previous;
1165 goto END_REPEAT;
1166 }
1167
1168 /* If the maximum is 1 or unlimited, we just have to stick in the
1169 BRAZERO and do no more at this point. */
1170
1171 if (repeat_max <= 1)
1172 {
1173 memmove(previous+1, previous, len);
1174 code++;
1175 *previous++ = OP_BRAZERO + repeat_type;
1176 }
1177
1178 /* If the maximum is greater than 1 and limited, we have to replicate
1179 in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1180 The first one has to be handled carefully because it's the original
1181 copy, which has to be moved up. The remainder can be handled by code
1182 that is common with the non-zero minimum case below. We just have to
1183 adjust the value or repeat_max, since one less copy is required. */
1184
1185 else
1186 {
1187 int offset;
1188 memmove(previous+4, previous, len);
1189 code += 4;
1190 *previous++ = OP_BRAZERO + repeat_type;
1191 *previous++ = OP_BRA;
1192
1193 /* We chain together the bracket offset fields that have to be
1194 filled in later when the ends of the brackets are reached. */
1195
1196 offset = (bralink == NULL)? 0 : previous - bralink;
1197 bralink = previous;
1198 *previous++ = offset >> 8;
1199 *previous++ = offset & 255;
1200 }
1201
1202 repeat_max--;
1203 }
1204
1205 /* If the minimum is greater than zero, replicate the group as many
1206 times as necessary, and adjust the maximum to the number of subsequent
1207 copies that we need. */
1208
1209 else
1210 {
1211 for (i = 1; i < repeat_min; i++)
1212 {
1213 memcpy(code, previous, len);
1214 code += len;
1215 }
1216 if (repeat_max > 0) repeat_max -= repeat_min;
1217 }
1218
1219 /* This code is common to both the zero and non-zero minimum cases. If
1220 the maximum is limited, it replicates the group in a nested fashion,
1221 remembering the bracket starts on a stack. In the case of a zero minimum,
1222 the first one was set up above. In all cases the repeat_max now specifies
1223 the number of additional copies needed. */
1224
1225 if (repeat_max >= 0)
1226 {
1227 for (i = repeat_max - 1; i >= 0; i--)
1228 {
1229 *code++ = OP_BRAZERO + repeat_type;
1230
1231 /* All but the final copy start a new nesting, maintaining the
1232 chain of brackets outstanding. */
1233
1234 if (i != 0)
1235 {
1236 int offset;
1237 *code++ = OP_BRA;
1238 offset = (bralink == NULL)? 0 : code - bralink;
1239 bralink = code;
1240 *code++ = offset >> 8;
1241 *code++ = offset & 255;
1242 }
1243
1244 memcpy(code, previous, len);
1245 code += len;
1246 }
1247
1248 /* Now chain through the pending brackets, and fill in their length
1249 fields (which are holding the chain links pro tem). */
1250
1251 while (bralink != NULL)
1252 {
1253 int oldlinkoffset;
1254 int offset = code - bralink + 1;
1255 uschar *bra = code - offset;
1256 oldlinkoffset = (bra[1] << 8) + bra[2];
1257 bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1258 *code++ = OP_KET;
1259 *code++ = bra[1] = offset >> 8;
1260 *code++ = bra[2] = (offset & 255);
1261 }
1262 }
1263
1264 /* If the maximum is unlimited, set a repeater in the final copy. We
1265 can't just offset backwards from the current code point, because we
1266 don't know if there's been an options resetting after the ket. The
1267 correct offset was computed above. */
1268
1269 else code[-ketoffset] = OP_KETRMAX + repeat_type;
1270 }
1271
1272 /* Else there's some kind of shambles */
1273
1274 else
1275 {
1276 *errorptr = ERR11;
1277 goto FAILED;
1278 }
1279
1280 /* In all case we no longer have a previous item. */
1281
1282 END_REPEAT:
1283 previous = NULL;
1284 break;
1285
1286
1287 /* Start of nested bracket sub-expression, or comment or lookahead or
1288 lookbehind or option setting or condition. First deal with special things
1289 that can come after a bracket; all are introduced by ?, and the appearance
1290 of any of them means that this is not a referencing group. They were
1291 checked for validity in the first pass over the string, so we don't have to
1292 check for syntax errors here. */
1293
1294 case '(':
1295 newoptions = options;
1296 condref = -1;
1297
1298 if (*(++ptr) == '?')
1299 {
1300 int set, unset;
1301 int *optset;
1302
1303 switch (*(++ptr))
1304 {
1305 case '#': /* Comment; skip to ket */
1306 ptr++;
1307 while (*ptr != ')') ptr++;
1308 continue;
1309
1310 case ':': /* Non-extracting bracket */
1311 bravalue = OP_BRA;
1312 ptr++;
1313 break;
1314
1315 case '(':
1316 bravalue = OP_COND; /* Conditional group */
1317 if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1318 {
1319 condref = *ptr - '0';
1320 while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1321 ptr++;
1322 }
1323 else ptr--;
1324 break;
1325
1326 case '=': /* Positive lookahead */
1327 bravalue = OP_ASSERT;
1328 ptr++;
1329 break;
1330
1331 case '!': /* Negative lookahead */
1332 bravalue = OP_ASSERT_NOT;
1333 ptr++;
1334 break;
1335
1336 case '<': /* Lookbehinds */
1337 switch (*(++ptr))
1338 {
1339 case '=': /* Positive lookbehind */
1340 bravalue = OP_ASSERTBACK;
1341 ptr++;
1342 break;
1343
1344 case '!': /* Negative lookbehind */
1345 bravalue = OP_ASSERTBACK_NOT;
1346 ptr++;
1347 break;
1348
1349 default: /* Syntax error */
1350 *errorptr = ERR24;
1351 goto FAILED;
1352 }
1353 break;
1354
1355 case '>': /* One-time brackets */
1356 bravalue = OP_ONCE;
1357 ptr++;
1358 break;
1359
1360 default: /* Option setting */
1361 set = unset = 0;
1362 optset = &set;
1363
1364 while (*ptr != ')' && *ptr != ':')
1365 {
1366 switch (*ptr++)
1367 {
1368 case '-': optset = &unset; break;
1369
1370 case 'i': *optset |= PCRE_CASELESS; break;
1371 case 'm': *optset |= PCRE_MULTILINE; break;
1372 case 's': *optset |= PCRE_DOTALL; break;
1373 case 'x': *optset |= PCRE_EXTENDED; break;
1374 case 'U': *optset |= PCRE_UNGREEDY; break;
1375 case 'X': *optset |= PCRE_EXTRA; break;
1376
1377 default:
1378 *errorptr = ERR12;
1379 goto FAILED;
1380 }
1381 }
1382
1383 /* Set up the changed option bits, but don't change anything yet. */
1384
1385 newoptions = (options | set) & (~unset);
1386
1387 /* If the options ended with ')' this is not the start of a nested
1388 group with option changes, so the options change at this level. At top
1389 level there is nothing else to be done (the options will in fact have
1390 been set from the start of compiling as a result of the first pass) but
1391 at an inner level we must compile code to change the ims options if
1392 necessary, and pass the new setting back so that it can be put at the
1393 start of any following branches, and when this group ends, a resetting
1394 item can be compiled. */
1395
1396 if (*ptr == ')')
1397 {
1398 if ((options & PCRE_INGROUP) != 0 &&
1399 (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1400 {
1401 *code++ = OP_OPT;
1402 *code++ = *optchanged = newoptions & PCRE_IMS;
1403 }
1404 options = newoptions; /* Change options at this level */
1405 previous = NULL; /* This item can't be repeated */
1406 continue; /* It is complete */
1407 }
1408
1409 /* If the options ended with ':' we are heading into a nested group
1410 with possible change of options. Such groups are non-capturing and are
1411 not assertions of any kind. All we need to do is skip over the ':';
1412 the newoptions value is handled below. */
1413
1414 bravalue = OP_BRA;
1415 ptr++;
1416 }
1417 }
1418
1419 /* Else we have a referencing group; adjust the opcode. */
1420
1421 else
1422 {
1423 if (++(*brackets) > EXTRACT_MAX)
1424 {
1425 *errorptr = ERR13;
1426 goto FAILED;
1427 }
1428 bravalue = OP_BRA + *brackets;
1429 }
1430
1431 /* Process nested bracketed re. Assertions may not be repeated, but other
1432 kinds can be. We copy code into a non-register variable in order to be able
1433 to pass its address because some compilers complain otherwise. Pass in a
1434 new setting for the ims options if they have changed. */
1435
1436 previous = (bravalue >= OP_ONCE)? code : NULL;
1437 *code = bravalue;
1438 tempcode = code;
1439
1440 if (!compile_regex(
1441 options | PCRE_INGROUP, /* Set for all nested groups */
1442 ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1443 newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1444 brackets, /* Bracket level */
1445 &tempcode, /* Where to put code (updated) */
1446 &ptr, /* Input pointer (updated) */
1447 errorptr, /* Where to put an error message */
1448 (bravalue == OP_ASSERTBACK ||
1449 bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1450 condref, /* Condition reference number */
1451 &subreqchar, /* For possible last char */
1452 &subcountlits, /* For literal count */
1453 cd)) /* Tables block */
1454 goto FAILED;
1455
1456 /* At the end of compiling, code is still pointing to the start of the
1457 group, while tempcode has been updated to point past the end of the group
1458 and any option resetting that may follow it. The pattern pointer (ptr)
1459 is on the bracket. */
1460
1461 /* If this is a conditional bracket, check that there are no more than
1462 two branches in the group. */
1463
1464 if (bravalue == OP_COND)
1465 {
1466 uschar *tc = code;
1467 condcount = 0;
1468
1469 do {
1470 condcount++;
1471 tc += (tc[1] << 8) | tc[2];
1472 }
1473 while (*tc != OP_KET);
1474
1475 if (condcount > 2)
1476 {
1477 *errorptr = ERR27;
1478 goto FAILED;
1479 }
1480 }
1481
1482 /* Handle updating of the required character. If the subpattern didn't
1483 set one, leave it as it was. Otherwise, update it for normal brackets of
1484 all kinds, forward assertions, and conditions with two branches. Don't
1485 update the literal count for forward assertions, however. If the bracket
1486 is followed by a quantifier with zero repeat, we have to back off. Hence
1487 the definition of prevreqchar and subcountlits outside the main loop so
1488 that they can be accessed for the back off. */
1489
1490 if (subreqchar > 0 &&
1491 (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1492 (bravalue == OP_COND && condcount == 2)))
1493 {
1494 prevreqchar = *reqchar;
1495 *reqchar = subreqchar;
1496 if (bravalue != OP_ASSERT) *countlits += subcountlits;
1497 }
1498
1499 /* Now update the main code pointer to the end of the group. */
1500
1501 code = tempcode;
1502
1503 /* Error if hit end of pattern */
1504
1505 if (*ptr != ')')
1506 {
1507 *errorptr = ERR14;
1508 goto FAILED;
1509 }
1510 break;
1511
1512 /* Check \ for being a real metacharacter; if not, fall through and handle
1513 it as a data character at the start of a string. Escape items are checked
1514 for validity in the pre-compiling pass. */
1515
1516 case '\\':
1517 tempptr = ptr;
1518 c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1519
1520 /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1521 are arranged to be the negation of the corresponding OP_values. For the
1522 back references, the values are ESC_REF plus the reference number. Only
1523 back references and those types that consume a character may be repeated.
1524 We can test for values between ESC_b and ESC_Z for the latter; this may
1525 have to change if any new ones are ever created. */
1526
1527 if (c < 0)
1528 {
1529 if (-c >= ESC_REF)
1530 {
1531 previous = code;
1532 *code++ = OP_REF;
1533 *code++ = -c - ESC_REF;
1534 }
1535 else
1536 {
1537 previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1538 *code++ = -c;
1539 }
1540 continue;
1541 }
1542
1543 /* Data character: reset and fall through */
1544
1545 ptr = tempptr;
1546 c = '\\';
1547
1548 /* Handle a run of data characters until a metacharacter is encountered.
1549 The first character is guaranteed not to be whitespace or # when the
1550 extended flag is set. */
1551
1552 NORMAL_CHAR:
1553 default:
1554 previous = code;
1555 *code = OP_CHARS;
1556 code += 2;
1557 length = 0;
1558
1559 do
1560 {
1561 if ((options & PCRE_EXTENDED) != 0)
1562 {
1563 if ((cd->ctypes[c] & ctype_space) != 0) continue;
1564 if (c == '#')
1565 {
1566 while ((c = *(++ptr)) != 0 && c != '\n');
1567 if (c == 0) break;
1568 continue;
1569 }
1570 }
1571
1572 /* Backslash may introduce a data char or a metacharacter. Escaped items
1573 are checked for validity in the pre-compiling pass. Stop the string
1574 before a metaitem. */
1575
1576 if (c == '\\')
1577 {
1578 tempptr = ptr;
1579 c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1580 if (c < 0) { ptr = tempptr; break; }
1581 }
1582
1583 /* Ordinary character or single-char escape */
1584
1585 *code++ = c;
1586 length++;
1587 }
1588
1589 /* This "while" is the end of the "do" above. */
1590
1591 while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1592
1593 /* Update the last character and the count of literals */
1594
1595 prevreqchar = (length > 1)? code[-2] : *reqchar;
1596 *reqchar = code[-1];
1597 *countlits += length;
1598
1599 /* Compute the length and set it in the data vector, and advance to
1600 the next state. */
1601
1602 previous[1] = length;
1603 if (length < 255) ptr--;
1604 break;
1605 }
1606 } /* end of big loop */
1607
1608 /* Control never reaches here by falling through, only by a goto for all the
1609 error states. Pass back the position in the pattern so that it can be displayed
1610 to the user for diagnosing the error. */
1611
1612 FAILED:
1613 *ptrptr = ptr;
1614 return FALSE;
1615 }
1616
1617
1618
1619
1620 /*************************************************
1621 * Compile sequence of alternatives *
1622 *************************************************/
1623
1624 /* On entry, ptr is pointing past the bracket character, but on return
1625 it points to the closing bracket, or vertical bar, or end of string.
1626 The code variable is pointing at the byte into which the BRA operator has been
1627 stored. If the ims options are changed at the start (for a (?ims: group) or
1628 during any branch, we need to insert an OP_OPT item at the start of every
1629 following branch to ensure they get set correctly at run time, and also pass
1630 the new options into every subsequent branch compile.
1631
1632 Argument:
1633 options the option bits
1634 optchanged new ims options to set as if (?ims) were at the start, or -1
1635 for no change
1636 brackets -> int containing the number of extracting brackets used
1637 codeptr -> the address of the current code pointer
1638 ptrptr -> the address of the current pattern pointer
1639 errorptr -> pointer to error message
1640 lookbehind TRUE if this is a lookbehind assertion
1641 condref > 0 for OPT_CREF setting at start of conditional group
1642 reqchar -> place to put the last required character, or a negative number
1643 countlits -> place to put the shortest literal count of any branch
1644 cd points to the data block with tables pointers
1645
1646 Returns: TRUE on success
1647 */
1648
1649 static BOOL
1650 compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1651 const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1652 int *reqchar, int *countlits, compile_data *cd)
1653 {
1654 const uschar *ptr = *ptrptr;
1655 uschar *code = *codeptr;
1656 uschar *last_branch = code;
1657 uschar *start_bracket = code;
1658 uschar *reverse_count = NULL;
1659 int oldoptions = options & PCRE_IMS;
1660 int branchreqchar, branchcountlits;
1661
1662 *reqchar = -1;
1663 *countlits = INT_MAX;
1664 code += 3;
1665
1666 /* At the start of a reference-based conditional group, insert the reference
1667 number as an OP_CREF item. */
1668
1669 if (condref > 0)
1670 {
1671 *code++ = OP_CREF;
1672 *code++ = condref;
1673 }
1674
1675 /* Loop for each alternative branch */
1676
1677 for (;;)
1678 {
1679 int length;
1680
1681 /* Handle change of options */
1682
1683 if (optchanged >= 0)
1684 {
1685 *code++ = OP_OPT;
1686 *code++ = optchanged;
1687 options = (options & ~PCRE_IMS) | optchanged;
1688 }
1689
1690 /* Set up dummy OP_REVERSE if lookbehind assertion */
1691
1692 if (lookbehind)
1693 {
1694 *code++ = OP_REVERSE;
1695 reverse_count = code;
1696 *code++ = 0;
1697 *code++ = 0;
1698 }
1699
1700 /* Now compile the branch */
1701
1702 if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1703 &branchreqchar, &branchcountlits, cd))
1704 {
1705 *ptrptr = ptr;
1706 return FALSE;
1707 }
1708
1709 /* Fill in the length of the last branch */
1710
1711 length = code - last_branch;
1712 last_branch[1] = length >> 8;
1713 last_branch[2] = length & 255;
1714
1715 /* Save the last required character if all branches have the same; a current
1716 value of -1 means unset, while -2 means "previous branch had no last required
1717 char". */
1718
1719 if (*reqchar != -2)
1720 {
1721 if (branchreqchar >= 0)
1722 {
1723 if (*reqchar == -1) *reqchar = branchreqchar;
1724 else if (*reqchar != branchreqchar) *reqchar = -2;
1725 }
1726 else *reqchar = -2;
1727 }
1728
1729 /* Keep the shortest literal count */
1730
1731 if (branchcountlits < *countlits) *countlits = branchcountlits;
1732 DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1733
1734 /* If lookbehind, check that this branch matches a fixed-length string,
1735 and put the length into the OP_REVERSE item. Temporarily mark the end of
1736 the branch with OP_END. */
1737
1738 if (lookbehind)
1739 {
1740 *code = OP_END;
1741 length = find_fixedlength(last_branch);
1742 DPRINTF(("fixed length = %d\n", length));
1743 if (length < 0)
1744 {
1745 *errorptr = ERR25;
1746 *ptrptr = ptr;
1747 return FALSE;
1748 }
1749 reverse_count[0] = (length >> 8);
1750 reverse_count[1] = length & 255;
1751 }
1752
1753 /* Reached end of expression, either ')' or end of pattern. Insert a
1754 terminating ket and the length of the whole bracketed item, and return,
1755 leaving the pointer at the terminating char. If any of the ims options
1756 were changed inside the group, compile a resetting op-code following. */
1757
1758 if (*ptr != '|')
1759 {
1760 length = code - start_bracket;
1761 *code++ = OP_KET;
1762 *code++ = length >> 8;
1763 *code++ = length & 255;
1764 if (optchanged >= 0)
1765 {
1766 *code++ = OP_OPT;
1767 *code++ = oldoptions;
1768 }
1769 *codeptr = code;
1770 *ptrptr = ptr;
1771 return TRUE;
1772 }
1773
1774 /* Another branch follows; insert an "or" node and advance the pointer. */
1775
1776 *code = OP_ALT;
1777 last_branch = code;
1778 code += 3;
1779 ptr++;
1780 }
1781 /* Control never reaches here */
1782 }
1783
1784
1785
1786
1787 /*************************************************
1788 * Find first significant op code *
1789 *************************************************/
1790
1791 /* This is called by several functions that scan a compiled expression looking
1792 for a fixed first character, or an anchoring op code etc. It skips over things
1793 that do not influence this. For one application, a change of caseless option is
1794 important.
1795
1796 Arguments:
1797 code pointer to the start of the group
1798 options pointer to external options
1799 optbit the option bit whose changing is significant, or
1800 zero if none are
1801 optstop TRUE to return on option change, otherwise change the options
1802 value and continue
1803
1804 Returns: pointer to the first significant opcode
1805 */
1806
1807 static const uschar*
1808 first_significant_code(const uschar *code, int *options, int optbit,
1809 BOOL optstop)
1810 {
1811 for (;;)
1812 {
1813 switch ((int)*code)
1814 {
1815 case OP_OPT:
1816 if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1817 {
1818 if (optstop) return code;
1819 *options = (int)code[1];
1820 }
1821 code += 2;
1822 break;
1823
1824 case OP_CREF:
1825 code += 2;
1826 break;
1827
1828 case OP_WORD_BOUNDARY:
1829 case OP_NOT_WORD_BOUNDARY:
1830 code++;
1831 break;
1832
1833 case OP_ASSERT_NOT:
1834 case OP_ASSERTBACK:
1835 case OP_ASSERTBACK_NOT:
1836 do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1837 code += 3;
1838 break;
1839
1840 default:
1841 return code;
1842 }
1843 }
1844 /* Control never reaches here */
1845 }
1846
1847
1848
1849
1850 /*************************************************
1851 * Check for anchored expression *
1852 *************************************************/
1853
1854 /* Try to find out if this is an anchored regular expression. Consider each
1855 alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1856 all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1857 it's anchored. However, if this is a multiline pattern, then only OP_SOD
1858 counts, since OP_CIRC can match in the middle.
1859
1860 A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1861 because that will try the rest of the pattern at all possible matching points,
1862 so there is no point trying them again.
1863
1864 Arguments:
1865 code points to start of expression (the bracket)
1866 options points to the options setting
1867
1868 Returns: TRUE or FALSE
1869 */
1870
1871 static BOOL
1872 is_anchored(register const uschar *code, int *options)
1873 {
1874 do {
1875 const uschar *scode = first_significant_code(code + 3, options,
1876 PCRE_MULTILINE, FALSE);
1877 register int op = *scode;
1878 if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1879 { if (!is_anchored(scode, options)) return FALSE; }
1880 else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1881 (*options & PCRE_DOTALL) != 0)
1882 { if (scode[1] != OP_ANY) return FALSE; }
1883 else if (op != OP_SOD &&
1884 ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1885 return FALSE;
1886 code += (code[1] << 8) + code[2];
1887 }
1888 while (*code == OP_ALT);
1889 return TRUE;
1890 }
1891
1892
1893
1894 /*************************************************
1895 * Check for starting with ^ or .* *
1896 *************************************************/
1897
1898 /* This is called to find out if every branch starts with ^ or .* so that
1899 "first char" processing can be done to speed things up in multiline
1900 matching and for non-DOTALL patterns that start with .* (which must start at
1901 the beginning or after \n).
1902
1903 Argument: points to start of expression (the bracket)
1904 Returns: TRUE or FALSE
1905 */
1906
1907 static BOOL
1908 is_startline(const uschar *code)
1909 {
1910 do {
1911 const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1912 register int op = *scode;
1913 if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1914 { if (!is_startline(scode)) return FALSE; }
1915 else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1916 { if (scode[1] != OP_ANY) return FALSE; }
1917 else if (op != OP_CIRC) return FALSE;
1918 code += (code[1] << 8) + code[2];
1919 }
1920 while (*code == OP_ALT);
1921 return TRUE;
1922 }
1923
1924
1925
1926 /*************************************************
1927 * Check for fixed first char *
1928 *************************************************/
1929
1930 /* Try to find out if there is a fixed first character. This is called for
1931 unanchored expressions, as it speeds up their processing quite considerably.
1932 Consider each alternative branch. If they all start with the same char, or with
1933 a bracket all of whose alternatives start with the same char (recurse ad lib),
1934 then we return that char, otherwise -1.
1935
1936 Arguments:
1937 code points to start of expression (the bracket)
1938 options pointer to the options (used to check casing changes)
1939
1940 Returns: -1 or the fixed first char
1941 */
1942
1943 static int
1944 find_firstchar(const uschar *code, int *options)
1945 {
1946 register int c = -1;
1947 do {
1948 int d;
1949 const uschar *scode = first_significant_code(code + 3, options,
1950 PCRE_CASELESS, TRUE);
1951 register int op = *scode;
1952
1953 if (op >= OP_BRA) op = OP_BRA;
1954
1955 switch(op)
1956 {
1957 default:
1958 return -1;
1959
1960 case OP_BRA:
1961 case OP_ASSERT:
1962 case OP_ONCE:
1963 case OP_COND:
1964 if ((d = find_firstchar(scode, options)) < 0) return -1;
1965 if (c < 0) c = d; else if (c != d) return -1;
1966 break;
1967
1968 case OP_EXACT: /* Fall through */
1969 scode++;
1970
1971 case OP_CHARS: /* Fall through */
1972 scode++;
1973
1974 case OP_PLUS:
1975 case OP_MINPLUS:
1976 if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1977 break;
1978 }
1979
1980 code += (code[1] << 8) + code[2];
1981 }
1982 while (*code == OP_ALT);
1983 return c;
1984 }
1985
1986
1987
1988
1989
1990 /*************************************************
1991 * Compile a Regular Expression *
1992 *************************************************/
1993
1994 /* This function takes a string and returns a pointer to a block of store
1995 holding a compiled version of the expression.
1996
1997 Arguments:
1998 pattern the regular expression
1999 options various option bits
2000 errorptr pointer to pointer to error text
2001 erroroffset ptr offset in pattern where error was detected
2002 tables pointer to character tables or NULL
2003
2004 Returns: pointer to compiled data block, or NULL on error,
2005 with errorptr and erroroffset set
2006 */
2007
2008 pcre *
2009 pcre_compile(const char *pattern, int options, const char **errorptr,
2010 int *erroroffset, const unsigned char *tables)
2011 {
2012 real_pcre *re;
2013 int length = 3; /* For initial BRA plus length */
2014 int runlength;
2015 int c, size, reqchar, countlits;
2016 int bracount = 0;
2017 int top_backref = 0;
2018 int branch_extra = 0;
2019 int branch_newextra;
2020 unsigned int brastackptr = 0;
2021 uschar *code;
2022 const uschar *ptr;
2023 compile_data compile_block;
2024 int brastack[BRASTACK_SIZE];
2025 uschar bralenstack[BRASTACK_SIZE];
2026
2027 #ifdef DEBUG
2028 uschar *code_base, *code_end;
2029 #endif
2030
2031 /* We can't pass back an error message if errorptr is NULL; I guess the best we
2032 can do is just return NULL. */
2033
2034 if (errorptr == NULL) return NULL;
2035 *errorptr = NULL;
2036
2037 /* However, we can give a message for this error */
2038
2039 if (erroroffset == NULL)
2040 {
2041 *errorptr = ERR16;
2042 return NULL;
2043 }
2044 *erroroffset = 0;
2045
2046 if ((options & ~PUBLIC_OPTIONS) != 0)
2047 {
2048 *errorptr = ERR17;
2049 return NULL;
2050 }
2051
2052 /* Set up pointers to the individual character tables */
2053
2054 if (tables == NULL) tables = pcre_default_tables;
2055 compile_block.lcc = tables + lcc_offset;
2056 compile_block.fcc = tables + fcc_offset;
2057 compile_block.cbits = tables + cbits_offset;
2058 compile_block.ctypes = tables + ctypes_offset;
2059
2060 /* Reflect pattern for debugging output */
2061
2062 DPRINTF(("------------------------------------------------------------------\n"));
2063 DPRINTF(("%s\n", pattern));
2064
2065 /* The first thing to do is to make a pass over the pattern to compute the
2066 amount of store required to hold the compiled code. This does not have to be
2067 perfect as long as errors are overestimates. At the same time we can detect any
2068 internal flag settings. Make an attempt to correct for any counted white space
2069 if an "extended" flag setting appears late in the pattern. We can't be so
2070 clever for #-comments. */
2071
2072 ptr = (const uschar *)(pattern - 1);
2073 while ((c = *(++ptr)) != 0)
2074 {
2075 int min, max;
2076 int class_charcount;
2077
2078 if ((options & PCRE_EXTENDED) != 0)
2079 {
2080 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2081 if (c == '#')
2082 {
2083 while ((c = *(++ptr)) != 0 && c != '\n');
2084 continue;
2085 }
2086 }
2087
2088 switch(c)
2089 {
2090 /* A backslashed item may be an escaped "normal" character or a
2091 character type. For a "normal" character, put the pointers and
2092 character back so that tests for whitespace etc. in the input
2093 are done correctly. */
2094
2095 case '\\':
2096 {
2097 const uschar *save_ptr = ptr;
2098 c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2099 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2100 if (c >= 0)
2101 {
2102 ptr = save_ptr;
2103 c = '\\';
2104 goto NORMAL_CHAR;
2105 }
2106 }
2107 length++;
2108
2109 /* A back reference needs an additional char, plus either one or 5
2110 bytes for a repeat. We also need to keep the value of the highest
2111 back reference. */
2112
2113 if (c <= -ESC_REF)
2114 {
2115 int refnum = -c - ESC_REF;
2116 if (refnum > top_backref) top_backref = refnum;
2117 length++; /* For single back reference */
2118 if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2119 {
2120 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2121 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2122 if ((min == 0 && (max == 1 || max == -1)) ||
2123 (min == 1 && max == -1))
2124 length++;
2125 else length += 5;
2126 if (ptr[1] == '?') ptr++;
2127 }
2128 }
2129 continue;
2130
2131 case '^':
2132 case '.':
2133 case '$':
2134 case '*': /* These repeats won't be after brackets; */
2135 case '+': /* those are handled separately */
2136 case '?':
2137 length++;
2138 continue;
2139
2140 /* This covers the cases of repeats after a single char, metachar, class,
2141 or back reference. */
2142
2143 case '{':
2144 if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2145 ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2146 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2147 if ((min == 0 && (max == 1 || max == -1)) ||
2148 (min == 1 && max == -1))
2149 length++;
2150 else
2151 {
2152 length--; /* Uncount the original char or metachar */
2153 if (min == 1) length++; else if (min > 0) length += 4;
2154 if (max > 0) length += 4; else length += 2;
2155 }
2156 if (ptr[1] == '?') ptr++;
2157 continue;
2158
2159 /* An alternation contains an offset to the next branch or ket. If any ims
2160 options changed in the previous branch(es), and/or if we are in a
2161 lookbehind assertion, extra space will be needed at the start of the
2162 branch. This is handled by branch_extra. */
2163
2164 case '|':
2165 length += 3 + branch_extra;
2166 continue;
2167
2168 /* A character class uses 33 characters. Don't worry about character types
2169 that aren't allowed in classes - they'll get picked up during the compile.
2170 A character class that contains only one character uses 2 or 3 bytes,
2171 depending on whether it is negated or not. Notice this where we can. */
2172
2173 case '[':
2174 class_charcount = 0;
2175 if (*(++ptr) == '^') ptr++;
2176 do
2177 {
2178 if (*ptr == '\\')
2179 {
2180 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2181 &compile_block);
2182 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2183 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2184 }
2185 else class_charcount++;
2186 ptr++;
2187 }
2188 while (*ptr != 0 && *ptr != ']');
2189
2190 /* Repeats for negated single chars are handled by the general code */
2191
2192 if (class_charcount == 1) length += 3; else
2193 {
2194 length += 33;
2195
2196 /* A repeat needs either 1 or 5 bytes. */
2197
2198 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2199 {
2200 ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2201 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2202 if ((min == 0 && (max == 1 || max == -1)) ||
2203 (min == 1 && max == -1))
2204 length++;
2205 else length += 5;
2206 if (ptr[1] == '?') ptr++;
2207 }
2208 }
2209 continue;
2210
2211 /* Brackets may be genuine groups or special things */
2212
2213 case '(':
2214 branch_newextra = 0;
2215
2216 /* Handle special forms of bracket, which all start (? */
2217
2218 if (ptr[1] == '?')
2219 {
2220 int set, unset;
2221 int *optset;
2222
2223 switch (c = ptr[2])
2224 {
2225 /* Skip over comments entirely */
2226 case '#':
2227 ptr += 3;
2228 while (*ptr != 0 && *ptr != ')') ptr++;
2229 if (*ptr == 0)
2230 {
2231 *errorptr = ERR18;
2232 goto PCRE_ERROR_RETURN;
2233 }
2234 continue;
2235
2236 /* Non-referencing groups and lookaheads just move the pointer on, and
2237 then behave like a non-special bracket, except that they don't increment
2238 the count of extracting brackets. Ditto for the "once only" bracket,
2239 which is in Perl from version 5.005. */
2240
2241 case ':':
2242 case '=':
2243 case '!':
2244 case '>':
2245 ptr += 2;
2246 break;
2247
2248 /* Lookbehinds are in Perl from version 5.005 */
2249
2250 case '<':
2251 if (ptr[3] == '=' || ptr[3] == '!')
2252 {
2253 ptr += 3;
2254 branch_newextra = 3;
2255 length += 3; /* For the first branch */
2256 break;
2257 }
2258 *errorptr = ERR24;
2259 goto PCRE_ERROR_RETURN;
2260
2261 /* Conditionals are in Perl from version 5.005. The bracket must either
2262 be followed by a number (for bracket reference) or by an assertion
2263 group. */
2264
2265 case '(':
2266 if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2267 {
2268 ptr += 4;
2269 length += 2;
2270 while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2271 if (*ptr != ')')
2272 {
2273 *errorptr = ERR26;
2274 goto PCRE_ERROR_RETURN;
2275 }
2276 }
2277 else /* An assertion must follow */
2278 {
2279 ptr++; /* Can treat like ':' as far as spacing is concerned */
2280
2281 if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2282 {
2283 ptr += 2; /* To get right offset in message */
2284 *errorptr = ERR28;
2285 goto PCRE_ERROR_RETURN;
2286 }
2287 }
2288 break;
2289
2290 /* Else loop checking valid options until ) is met. Anything else is an
2291 error. If we are without any brackets, i.e. at top level, the settings
2292 act as if specified in the options, so massage the options immediately.
2293 This is for backward compatibility with Perl 5.004. */
2294
2295 default:
2296 set = unset = 0;
2297 optset = &set;
2298 ptr += 2;
2299
2300 for (;; ptr++)
2301 {
2302 c = *ptr;
2303 switch (c)
2304 {
2305 case 'i':
2306 *optset |= PCRE_CASELESS;
2307 continue;
2308
2309 case 'm':
2310 *optset |= PCRE_MULTILINE;
2311 continue;
2312
2313 case 's':
2314 *optset |= PCRE_DOTALL;
2315 continue;
2316
2317 case 'x':
2318 *optset |= PCRE_EXTENDED;
2319 continue;
2320
2321 case 'X':
2322 *optset |= PCRE_EXTRA;
2323 continue;
2324
2325 case 'U':
2326 *optset |= PCRE_UNGREEDY;
2327 continue;
2328
2329 case '-':
2330 optset = &unset;
2331 continue;
2332
2333 /* A termination by ')' indicates an options-setting-only item;
2334 this is global at top level; otherwise nothing is done here and
2335 it is handled during the compiling process on a per-bracket-group
2336 basis. */
2337
2338 case ')':
2339 if (brastackptr == 0)
2340 {
2341 options = (options | set) & (~unset);
2342 set = unset = 0; /* To save length */
2343 }
2344 /* Fall through */
2345
2346 /* A termination by ':' indicates the start of a nested group with
2347 the given options set. This is again handled at compile time, but
2348 we must allow for compiled space if any of the ims options are
2349 set. We also have to allow for resetting space at the end of
2350 the group, which is why 4 is added to the length and not just 2.
2351 If there are several changes of options within the same group, this
2352 will lead to an over-estimate on the length, but this shouldn't
2353 matter very much. We also have to allow for resetting options at
2354 the start of any alternations, which we do by setting
2355 branch_newextra to 2. Finally, we record whether the case-dependent
2356 flag ever changes within the regex. This is used by the "required
2357 character" code. */
2358
2359 case ':':
2360 if (((set|unset) & PCRE_IMS) != 0)
2361 {
2362 length += 4;
2363 branch_newextra = 2;
2364 if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2365 }
2366 goto END_OPTIONS;
2367
2368 /* Unrecognized option character */
2369
2370 default:
2371 *errorptr = ERR12;
2372 goto PCRE_ERROR_RETURN;
2373 }
2374 }
2375
2376 /* If we hit a closing bracket, that's it - this is a freestanding
2377 option-setting. We need to ensure that branch_extra is updated if
2378 necessary. The only values branch_newextra can have here are 0 or 2.
2379 If the value is 2, then branch_extra must either be 2 or 5, depending
2380 on whether this is a lookbehind group or not. */
2381
2382 END_OPTIONS:
2383 if (c == ')')
2384 {
2385 if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2386 branch_extra += branch_newextra;
2387 continue;
2388 }
2389
2390 /* If options were terminated by ':' control comes here. Fall through
2391 to handle the group below. */
2392 }
2393 }
2394
2395 /* Extracting brackets must be counted so we can process escapes in a
2396 Perlish way. */
2397
2398 else bracount++;
2399
2400 /* Non-special forms of bracket. Save length for computing whole length
2401 at end if there's a repeat that requires duplication of the group. Also
2402 save the current value of branch_extra, and start the new group with
2403 the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2404 for a lookbehind assertion. */
2405
2406 if (brastackptr >= sizeof(brastack)/sizeof(int))
2407 {
2408 *errorptr = ERR19;
2409 goto PCRE_ERROR_RETURN;
2410 }
2411
2412 bralenstack[brastackptr] = branch_extra;
2413 branch_extra = branch_newextra;
2414
2415 brastack[brastackptr++] = length;
2416 length += 3;
2417 continue;
2418
2419 /* Handle ket. Look for subsequent max/min; for certain sets of values we
2420 have to replicate this bracket up to that many times. If brastackptr is
2421 0 this is an unmatched bracket which will generate an error, but take care
2422 not to try to access brastack[-1] when computing the length and restoring
2423 the branch_extra value. */
2424
2425 case ')':
2426 length += 3;
2427 {
2428 int minval = 1;
2429 int maxval = 1;
2430 int duplength;
2431
2432 if (brastackptr > 0)
2433 {
2434 duplength = length - brastack[--brastackptr];
2435 branch_extra = bralenstack[brastackptr];
2436 }
2437 else duplength = 0;
2438
2439 /* Leave ptr at the final char; for read_repeat_counts this happens
2440 automatically; for the others we need an increment. */
2441
2442 if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2443 {
2444 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2445 &compile_block);
2446 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2447 }
2448 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2449 else if (c == '+') { maxval = -1; ptr++; }
2450 else if (c == '?') { minval = 0; ptr++; }
2451
2452 /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2453 group, and if the maximum is greater than zero, we have to replicate
2454 maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2455 bracket set - hence the 7. */
2456
2457 if (minval == 0)
2458 {
2459 length++;
2460 if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2461 }
2462
2463 /* When the minimum is greater than zero, 1 we have to replicate up to
2464 minval-1 times, with no additions required in the copies. Then, if
2465 there is a limited maximum we have to replicate up to maxval-1 times
2466 allowing for a BRAZERO item before each optional copy and nesting
2467 brackets for all but one of the optional copies. */
2468
2469 else
2470 {
2471 length += (minval - 1) * duplength;
2472 if (maxval > minval) /* Need this test as maxval=-1 means no limit */
2473 length += (maxval - minval) * (duplength + 7) - 6;
2474 }
2475 }
2476 continue;
2477
2478 /* Non-special character. For a run of such characters the length required
2479 is the number of characters + 2, except that the maximum run length is 255.
2480 We won't get a skipped space or a non-data escape or the start of a #
2481 comment as the first character, so the length can't be zero. */
2482
2483 NORMAL_CHAR:
2484 default:
2485 length += 2;
2486 runlength = 0;
2487 do
2488 {
2489 if ((options & PCRE_EXTENDED) != 0)
2490 {
2491 if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2492 if (c == '#')
2493 {
2494 while ((c = *(++ptr)) != 0 && c != '\n');
2495 continue;
2496 }
2497 }
2498
2499 /* Backslash may introduce a data char or a metacharacter; stop the
2500 string before the latter. */
2501
2502 if (c == '\\')
2503 {
2504 const uschar *saveptr = ptr;
2505 c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2506 &compile_block);
2507 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2508 if (c < 0) { ptr = saveptr; break; }
2509 }
2510
2511 /* Ordinary character or single-char escape */
2512
2513 runlength++;
2514 }
2515
2516 /* This "while" is the end of the "do" above. */
2517
2518 while (runlength < 255 &&
2519 (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2520
2521 ptr--;
2522 length += runlength;
2523 continue;
2524 }
2525 }
2526
2527 length += 4; /* For final KET and END */
2528
2529 if (length > 65539)
2530 {
2531 *errorptr = ERR20;
2532 return NULL;
2533 }
2534
2535 /* Compute the size of data block needed and get it, either from malloc or
2536 externally provided function. We specify "code[0]" in the offsetof() expression
2537 rather than just "code", because it has been reported that one broken compiler
2538 fails on "code" because it is also an independent variable. It should make no
2539 difference to the value of the offsetof(). */
2540
2541 size = length + offsetof(real_pcre, code[0]);
2542 re = (real_pcre *)(pcre_malloc)(size);
2543
2544 if (re == NULL)
2545 {
2546 *errorptr = ERR21;
2547 return NULL;
2548 }
2549
2550 /* Put in the magic number and the options. */
2551
2552 re->magic_number = MAGIC_NUMBER;
2553 re->options = options;
2554 re->tables = tables;
2555
2556 /* Set up a starting, non-extracting bracket, then compile the expression. On
2557 error, *errorptr will be set non-NULL, so we don't need to look at the result
2558 of the function here. */
2559
2560 ptr = (const uschar *)pattern;
2561 code = re->code;
2562 *code = OP_BRA;
2563 bracount = 0;
2564 (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2565 &reqchar, &countlits, &compile_block);
2566 re->top_bracket = bracount;
2567 re->top_backref = top_backref;
2568
2569 /* If not reached end of pattern on success, there's an excess bracket. */
2570
2571 if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
2572
2573 /* Fill in the terminating state and check for disastrous overflow, but
2574 if debugging, leave the test till after things are printed out. */
2575
2576 *code++ = OP_END;
2577
2578 #ifndef DEBUG
2579 if (code - re->code > length) *errorptr = ERR23;
2580 #endif
2581
2582 /* Give an error if there's back reference to a non-existent capturing
2583 subpattern. */
2584
2585 if (top_backref > re->top_bracket) *errorptr = ERR15;
2586
2587 /* Failed to compile */
2588
2589 if (*errorptr != NULL)
2590 {
2591 (pcre_free)(re);
2592 PCRE_ERROR_RETURN:
2593 *erroroffset = ptr - (const uschar *)pattern;
2594 return NULL;
2595 }
2596
2597 /* If the anchored option was not passed, set flag if we can determine that the
2598 pattern is anchored by virtue of ^ characters or \A or anything else (such as
2599 starting with .* when DOTALL is set).
2600
2601 Otherwise, see if we can determine what the first character has to be, because
2602 that speeds up unanchored matches no end. If not, see if we can set the
2603 PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2604 start with ^. and also when all branches start with .* for non-DOTALL matches.
2605 */
2606
2607 if ((options & PCRE_ANCHORED) == 0)
2608 {
2609 int temp_options = options;
2610 if (is_anchored(re->code, &temp_options))
2611 re->options |= PCRE_ANCHORED;
2612 else
2613 {
2614 int ch = find_firstchar(re->code, &temp_options);
2615 if (ch >= 0)
2616 {
2617 re->first_char = ch;
2618 re->options |= PCRE_FIRSTSET;
2619 }
2620 else if (is_startline(re->code))
2621 re->options |= PCRE_STARTLINE;
2622 }
2623 }
2624
2625 /* Save the last required character if there are at least two literal
2626 characters on all paths, or if there is no first character setting. */
2627
2628 if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2629 {
2630 re->req_char = reqchar;
2631 re->options |= PCRE_REQCHSET;
2632 }
2633
2634 /* Print out the compiled data for debugging */
2635
2636 #ifdef DEBUG
2637
2638 printf("Length = %d top_bracket = %d top_backref = %d\n",
2639 length, re->top_bracket, re->top_backref);
2640
2641 if (re->options != 0)
2642 {
2643 printf("%s%s%s%s%s%s%s%s%s\n",
2644 ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2645 ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2646 ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2647 ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2648 ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2649 ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2650 ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2651 ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2652 ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2653 }
2654
2655 if ((re->options & PCRE_FIRSTSET) != 0)
2656 {
2657 if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
2658 else printf("First char = \\x%02x\n", re->first_char);
2659 }
2660
2661 if ((re->options & PCRE_REQCHSET) != 0)
2662 {
2663 if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2664 else printf("Req char = \\x%02x\n", re->req_char);
2665 }
2666
2667 code_end = code;
2668 code_base = code = re->code;
2669
2670 while (code < code_end)
2671 {
2672 int charlength;
2673
2674 printf("%3d ", code - code_base);
2675
2676 if (*code >= OP_BRA)
2677 {
2678 printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2679 code += 2;
2680 }
2681
2682 else switch(*code)
2683 {
2684 case OP_OPT:
2685 printf(" %.2x %s", code[1], OP_names[*code]);
2686 code++;
2687 break;
2688
2689 case OP_COND:
2690 printf("%3d Cond", (code[1] << 8) + code[2]);
2691 code += 2;
2692 break;
2693
2694 case OP_CREF:
2695 printf(" %.2d %s", code[1], OP_names[*code]);
2696 code++;
2697 break;
2698
2699 case OP_CHARS:
2700 charlength = *(++code);
2701 printf("%3d ", charlength);
2702 while (charlength-- > 0)
2703 if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2704 break;
2705
2706 case OP_KETRMAX:
2707 case OP_KETRMIN:
2708 case OP_ALT:
2709 case OP_KET:
2710 case OP_ASSERT:
2711 case OP_ASSERT_NOT:
2712 case OP_ASSERTBACK:
2713 case OP_ASSERTBACK_NOT:
2714 case OP_ONCE:
2715 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2716 code += 2;
2717 break;
2718
2719 case OP_REVERSE:
2720 printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2721 code += 2;
2722 break;
2723
2724 case OP_STAR:
2725 case OP_MINSTAR:
2726 case OP_PLUS:
2727 case OP_MINPLUS:
2728 case OP_QUERY:
2729 case OP_MINQUERY:
2730 case OP_TYPESTAR:
2731 case OP_TYPEMINSTAR:
2732 case OP_TYPEPLUS:
2733 case OP_TYPEMINPLUS:
2734 case OP_TYPEQUERY:
2735 case OP_TYPEMINQUERY:
2736 if (*code >= OP_TYPESTAR)
2737 printf(" %s", OP_names[code[1]]);
2738 else if (isprint(c = code[1])) printf(" %c", c);
2739 else printf(" \\x%02x", c);
2740 printf("%s", OP_names[*code++]);
2741 break;
2742
2743 case OP_EXACT:
2744 case OP_UPTO:
2745 case OP_MINUPTO:
2746 if (isprint(c = code[3])) printf(" %c{", c);
2747 else printf(" \\x%02x{", c);
2748 if (*code != OP_EXACT) printf("0,");
2749 printf("%d}", (code[1] << 8) + code[2]);
2750 if (*code == OP_MINUPTO) printf("?");
2751 code += 3;
2752 break;
2753
2754 case OP_TYPEEXACT:
2755 case OP_TYPEUPTO:
2756 case OP_TYPEMINUPTO:
2757 printf(" %s{", OP_names[code[3]]);
2758 if (*code != OP_TYPEEXACT) printf(",");
2759 printf("%d}", (code[1] << 8) + code[2]);
2760 if (*code == OP_TYPEMINUPTO) printf("?");
2761 code += 3;
2762 break;
2763
2764 case OP_NOT:
2765 if (isprint(c = *(++code))) printf(" [^%c]", c);
2766 else printf(" [^\\x%02x]", c);
2767 break;
2768
2769 case OP_NOTSTAR:
2770 case OP_NOTMINSTAR:
2771 case OP_NOTPLUS:
2772 case OP_NOTMINPLUS:
2773 case OP_NOTQUERY:
2774 case OP_NOTMINQUERY:
2775 if (isprint(c = code[1])) printf(" [^%c]", c);
2776 else printf(" [^\\x%02x]", c);
2777 printf("%s", OP_names[*code++]);
2778 break;
2779
2780 case OP_NOTEXACT:
2781 case OP_NOTUPTO:
2782 case OP_NOTMINUPTO:
2783 if (isprint(c = code[3])) printf(" [^%c]{", c);
2784 else printf(" [^\\x%02x]{", c);
2785 if (*code != OP_NOTEXACT) printf(",");
2786 printf("%d}", (code[1] << 8) + code[2]);
2787 if (*code == OP_NOTMINUPTO) printf("?");
2788 code += 3;
2789 break;
2790
2791 case OP_REF:
2792 printf(" \\%d", *(++code));
2793 code ++;
2794 goto CLASS_REF_REPEAT;
2795
2796 case OP_CLASS:
2797 {
2798 int i, min, max;
2799 code++;
2800 printf(" [");
2801
2802 for (i = 0; i < 256; i++)
2803 {
2804 if ((code[i/8] & (1 << (i&7))) != 0)
2805 {
2806 int j;
2807 for (j = i+1; j < 256; j++)
2808 if ((code[j/8] & (1 << (j&7))) == 0) break;
2809 if (i == '-' || i == ']') printf("\\");
2810 if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2811 if (--j > i)
2812 {
2813 printf("-");
2814 if (j == '-' || j == ']') printf("\\");
2815 if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2816 }
2817 i = j;
2818 }
2819 }
2820 printf("]");
2821 code += 32;
2822
2823 CLASS_REF_REPEAT:
2824
2825 switch(*code)
2826 {
2827 case OP_CRSTAR:
2828 case OP_CRMINSTAR:
2829 case OP_CRPLUS:
2830 case OP_CRMINPLUS:
2831 case OP_CRQUERY:
2832 case OP_CRMINQUERY:
2833 printf("%s", OP_names[*code]);
2834 break;
2835
2836 case OP_CRRANGE:
2837 case OP_CRMINRANGE:
2838 min = (code[1] << 8) + code[2];
2839 max = (code[3] << 8) + code[4];
2840 if (max == 0) printf("{%d,}", min);
2841 else printf("{%d,%d}", min, max);
2842 if (*code == OP_CRMINRANGE) printf("?");
2843 code += 4;
2844 break;
2845
2846 default:
2847 code--;
2848 }
2849 }
2850 break;
2851
2852 /* Anything else is just a one-node item */
2853
2854 default:
2855 printf(" %s", OP_names[*code]);
2856 break;
2857 }
2858
2859 code++;
2860 printf("\n");
2861 }
2862 printf("------------------------------------------------------------------\n");
2863
2864 /* This check is done here in the debugging case so that the code that
2865 was compiled can be seen. */
2866
2867 if (code - re->code > length)
2868 {
2869 *errorptr = ERR23;
2870 (pcre_free)(re);
2871 *erroroffset = ptr - (uschar *)pattern;
2872 return NULL;
2873 }
2874 #endif
2875
2876 return (pcre *)re;
2877 }
2878
2879
2880
2881 /*************************************************
2882 * Match a back-reference *
2883 *************************************************/
2884
2885 /* If a back reference hasn't been set, the length that is passed is greater
2886 than the number of characters left in the string, so the match fails.
2887
2888 Arguments:
2889 offset index into the offset vector
2890 eptr points into the subject
2891 length length to be matched
2892 md points to match data block
2893 ims the ims flags
2894
2895 Returns: TRUE if matched
2896 */
2897
2898 static BOOL
2899 match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2900 unsigned long int ims)
2901 {
2902 const uschar *p = md->start_subject + md->offset_vector[offset];
2903
2904 #ifdef DEBUG
2905 if (eptr >= md->end_subject)
2906 printf("matching subject <null>");
2907 else
2908 {
2909 printf("matching subject ");
2910 pchars(eptr, length, TRUE, md);
2911 }
2912 printf(" against backref ");
2913 pchars(p, length, FALSE, md);
2914 printf("\n");
2915 #endif
2916
2917 /* Always fail if not enough characters left */
2918
2919 if (length > md->end_subject - eptr) return FALSE;
2920
2921 /* Separate the caselesss case for speed */
2922
2923 if ((ims & PCRE_CASELESS) != 0)
2924 {
2925 while (length-- > 0)
2926 if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2927 }
2928 else
2929 { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2930
2931 return TRUE;
2932 }
2933
2934
2935
2936 /*************************************************
2937 * Match from current position *
2938 *************************************************/
2939
2940 /* On entry ecode points to the first opcode, and eptr to the first character
2941 in the subject string, while eptrb holds the value of eptr at the start of the
2942 last bracketed group - used for breaking infinite loops matching zero-length
2943 strings.
2944
2945 Arguments:
2946 eptr pointer in subject
2947 ecode position in code
2948 offset_top current top pointer
2949 md pointer to "static" info for the match
2950 ims current /i, /m, and /s options
2951 condassert TRUE if called to check a condition assertion
2952 eptrb eptr at start of last bracket
2953
2954 Returns: TRUE if matched
2955 */
2956
2957 static BOOL
2958 match(register const uschar *eptr, register const uschar *ecode,
2959 int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
2960 const uschar *eptrb)
2961 {
2962 unsigned long int original_ims = ims; /* Save for resetting on ')' */
2963
2964 for (;;)
2965 {
2966 int op = (int)*ecode;
2967 int min, max, ctype;
2968 register int i;
2969 register int c;
2970 BOOL minimize = FALSE;
2971
2972 /* Opening capturing bracket. If there is space in the offset vector, save
2973 the current subject position in the working slot at the top of the vector. We
2974 mustn't change the current values of the data slot, because they may be set
2975 from a previous iteration of this group, and be referred to by a reference
2976 inside the group.
2977
2978 If the bracket fails to match, we need to restore this value and also the
2979 values of the final offsets, in case they were set by a previous iteration of
2980 the same bracket.
2981
2982 If there isn't enough space in the offset vector, treat this as if it were a
2983 non-capturing bracket. Don't worry about setting the flag for the error case
2984 here; that is handled in the code for KET. */
2985
2986 if (op > OP_BRA)
2987 {
2988 int number = op - OP_BRA;
2989 int offset = number << 1;
2990
2991 #ifdef DEBUG
2992 printf("start bracket %d subject=", number);
2993 pchars(eptr, 16, TRUE, md);
2994 printf("\n");
2995 #endif
2996
2997 if (offset < md->offset_max)
2998 {
2999 int save_offset1 = md->offset_vector[offset];
3000 int save_offset2 = md->offset_vector[offset+1];
3001 int save_offset3 = md->offset_vector[md->offset_end - number];
3002
3003 DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
3004 md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
3005
3006 do
3007 {
3008 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3009 ecode += (ecode[1] << 8) + ecode[2];
3010 }
3011 while (*ecode == OP_ALT);
3012
3013 DPRINTF(("bracket %d failed\n", number));
3014
3015 md->offset_vector[offset] = save_offset1;
3016 md->offset_vector[offset+1] = save_offset2;
3017 md->offset_vector[md->offset_end - number] = save_offset3;
3018 return FALSE;
3019 }
3020
3021 /* Insufficient room for saving captured contents */
3022
3023 else op = OP_BRA;
3024 }
3025
3026 /* Other types of node can be handled by a switch */
3027
3028 switch(op)
3029 {
3030 case OP_BRA: /* Non-capturing bracket: optimized */
3031 DPRINTF(("start bracket 0\n"));
3032 do
3033 {
3034 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3035 ecode += (ecode[1] << 8) + ecode[2];
3036 }
3037 while (*ecode == OP_ALT);
3038 DPRINTF(("bracket 0 failed\n"));
3039 return FALSE;
3040
3041 /* Conditional group: compilation checked that there are no more than
3042 two branches. If the condition is false, skipping the first branch takes us
3043 past the end if there is only one branch, but that's OK because that is
3044 exactly what going to the ket would do. */
3045
3046 case OP_COND:
3047 if (ecode[3] == OP_CREF) /* Condition is extraction test */
3048 {
3049 int offset = ecode[4] << 1; /* Doubled reference number */
3050 return match(eptr,
3051 ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3052 5 : 3 + (ecode[1] << 8) + ecode[2]),
3053 offset_top, md, ims, FALSE, eptr);
3054 }
3055
3056 /* The condition is an assertion. Call match() to evaluate it - setting
3057 the final argument TRUE causes it to stop at the end of an assertion. */
3058
3059 else
3060 {
3061 if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3062 {
3063 ecode += 3 + (ecode[4] << 8) + ecode[5];
3064 while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3065 }
3066 else ecode += (ecode[1] << 8) + ecode[2];
3067 return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3068 }
3069 /* Control never reaches here */
3070
3071 /* Skip over conditional reference data if encountered (should not be) */
3072
3073 case OP_CREF:
3074 ecode += 2;
3075 break;
3076
3077 /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3078 an empty string - recursion will then try other alternatives, if any. */
3079
3080 case OP_END:
3081 if (md->notempty && eptr == md->start_match) return FALSE;
3082 md->end_match_ptr = eptr; /* Record where we ended */
3083 md->end_offset_top = offset_top; /* and how many extracts were taken */
3084 return TRUE;
3085
3086 /* Change option settings */
3087
3088 case OP_OPT:
3089 ims = ecode[1];
3090 ecode += 2;
3091 DPRINTF(("ims set to %02x\n", ims));
3092 break;
3093
3094 /* Assertion brackets. Check the alternative branches in turn - the
3095 matching won't pass the KET for an assertion. If any one branch matches,
3096 the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3097 start of each branch to move the current point backwards, so the code at
3098 this level is identical to the lookahead case. */
3099
3100 case OP_ASSERT:
3101 case OP_ASSERTBACK:
3102 do
3103 {
3104 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3105 ecode += (ecode[1] << 8) + ecode[2];
3106 }
3107 while (*ecode == OP_ALT);
3108 if (*ecode == OP_KET) return FALSE;
3109
3110 /* If checking an assertion for a condition, return TRUE. */
3111
3112 if (condassert) return TRUE;
3113
3114 /* Continue from after the assertion, updating the offsets high water
3115 mark, since extracts may have been taken during the assertion. */
3116
3117 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3118 ecode += 3;
3119 offset_top = md->end_offset_top;
3120 continue;
3121
3122 /* Negative assertion: all branches must fail to match */
3123
3124 case OP_ASSERT_NOT:
3125 case OP_ASSERTBACK_NOT:
3126 do
3127 {
3128 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3129 ecode += (ecode[1] << 8) + ecode[2];
3130 }
3131 while (*ecode == OP_ALT);
3132
3133 if (condassert) return TRUE;
3134 ecode += 3;
3135 continue;
3136
3137 /* Move the subject pointer back. This occurs only at the start of
3138 each branch of a lookbehind assertion. If we are too close to the start to
3139 move back, this match function fails. */
3140
3141 case OP_REVERSE:
3142 eptr -= (ecode[1] << 8) + ecode[2];
3143 if (eptr < md->start_subject) return FALSE;
3144 ecode += 3;
3145 break;
3146
3147
3148 /* "Once" brackets are like assertion brackets except that after a match,
3149 the point in the subject string is not moved back. Thus there can never be
3150 a move back into the brackets. Check the alternative branches in turn - the
3151 matching won't pass the KET for this kind of subpattern. If any one branch
3152 matches, we carry on as at the end of a normal bracket, leaving the subject
3153 pointer. */
3154
3155 case OP_ONCE:
3156 {
3157 const uschar *prev = ecode;
3158
3159 do
3160 {
3161 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3162 ecode += (ecode[1] << 8) + ecode[2];
3163 }
3164 while (*ecode == OP_ALT);
3165
3166 /* If hit the end of the group (which could be repeated), fail */
3167
3168 if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3169
3170 /* Continue as from after the assertion, updating the offsets high water
3171 mark, since extracts may have been taken. */
3172
3173 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3174
3175 offset_top = md->end_offset_top;
3176 eptr = md->end_match_ptr;
3177
3178 /* For a non-repeating ket, just continue at this level. This also
3179 happens for a repeating ket if no characters were matched in the group.
3180 This is the forcible breaking of infinite loops as implemented in Perl
3181 5.005. If there is an options reset, it will get obeyed in the normal
3182 course of events. */
3183
3184 if (*ecode == OP_KET || eptr == eptrb)
3185 {
3186 ecode += 3;
3187 break;
3188 }
3189
3190 /* The repeating kets try the rest of the pattern or restart from the
3191 preceding bracket, in the appropriate order. We need to reset any options
3192 that changed within the bracket before re-running it, so check the next
3193 opcode. */
3194
3195 if (ecode[3] == OP_OPT)
3196 {
3197 ims = (ims & ~PCRE_IMS) | ecode[4];
3198 DPRINTF(("ims set to %02x at group repeat\n", ims));
3199 }
3200
3201 if (*ecode == OP_KETRMIN)
3202 {
3203 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3204 match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3205 }
3206 else /* OP_KETRMAX */
3207 {
3208 if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3209 match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3210 }
3211 }
3212 return FALSE;
3213
3214 /* An alternation is the end of a branch; scan along to find the end of the
3215 bracketed group and go to there. */
3216
3217 case OP_ALT:
3218 do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3219 break;
3220
3221 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
3222 that it may occur zero times. It may repeat infinitely, or not at all -
3223 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
3224 repeat limits are compiled as a number of copies, with the optional ones
3225 preceded by BRAZERO or BRAMINZERO. */
3226
3227 case OP_BRAZERO:
3228 {
3229 const uschar *next = ecode+1;
3230 if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3231 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3232 ecode = next + 3;
3233 }
3234 break;
3235
3236 case OP_BRAMINZERO:
3237 {
3238 const uschar *next = ecode+1;
3239 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3240 if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3241 ecode++;
3242 }
3243 break;
3244
3245 /* End of a group, repeated or non-repeating. If we are at the end of
3246 an assertion "group", stop matching and return TRUE, but record the
3247 current high water mark for use by positive assertions. Do this also
3248 for the "once" (not-backup up) groups. */
3249
3250 case OP_KET:
3251 case OP_KETRMIN:
3252 case OP_KETRMAX:
3253 {
3254 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3255
3256 if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3257 *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3258 *prev == OP_ONCE)
3259 {
3260 md->end_match_ptr = eptr; /* For ONCE */
3261 md->end_offset_top = offset_top;
3262 return TRUE;
3263 }
3264
3265 /* In all other cases except a conditional group we have to check the
3266 group number back at the start and if necessary complete handling an
3267 extraction by setting the offsets and bumping the high water mark. */
3268
3269 if (*prev != OP_COND)
3270 {
3271 int number = *prev - OP_BRA;
3272 int offset = number << 1;
3273
3274 DPRINTF(("end bracket %d\n", number));
3275
3276 if (number > 0)
3277 {
3278 if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3279 {
3280 md->offset_vector[offset] =
3281 md->offset_vector[md->offset_end - number];
3282 md->offset_vector[offset+1] = eptr - md->start_subject;
3283 if (offset_top <= offset) offset_top = offset + 2;
3284 }
3285 }
3286 }
3287
3288 /* Reset the value of the ims flags, in case they got changed during
3289 the group. */
3290
3291 ims = original_ims;
3292 DPRINTF(("ims reset to %02x\n", ims));
3293
3294 /* For a non-repeating ket, just continue at this level. This also
3295 happens for a repeating ket if no characters were matched in the group.
3296 This is the forcible breaking of infinite loops as implemented in Perl
3297 5.005. If there is an options reset, it will get obeyed in the normal
3298 course of events. */
3299
3300 if (*ecode == OP_KET || eptr == eptrb)
3301 {
3302 ecode += 3;
3303 break;
3304 }
3305
3306 /* The repeating kets try the rest of the pattern or restart from the
3307 preceding bracket, in the appropriate order. */
3308
3309 if (*ecode == OP_KETRMIN)
3310 {
3311 if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3312 match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3313 }
3314 else /* OP_KETRMAX */
3315 {
3316 if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3317 match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3318 }
3319 }
3320 return FALSE;
3321
3322 /* Start of subject unless notbol, or after internal newline if multiline */
3323
3324 case OP_CIRC:
3325 if (md->notbol && eptr == md->start_subject) return FALSE;
3326 if ((ims & PCRE_MULTILINE) != 0)
3327 {
3328 if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3329 ecode++;
3330 break;
3331 }
3332 /* ... else fall through */
3333
3334 /* Start of subject assertion */
3335
3336 case OP_SOD:
3337 if (eptr != md->start_subject) return FALSE;
3338 ecode++;
3339 break;
3340
3341 /* Assert before internal newline if multiline, or before a terminating
3342 newline unless endonly is set, else end of subject unless noteol is set. */
3343
3344 case OP_DOLL:
3345 if ((ims & PCRE_MULTILINE) != 0)
3346 {
3347 if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3348 else { if (md->noteol) return FALSE; }
3349 ecode++;
3350 break;
3351 }
3352 else
3353 {
3354 if (md->noteol) return FALSE;
3355 if (!md->endonly)
3356 {
3357 if (eptr < md->end_subject - 1 ||
3358 (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3359
3360 ecode++;
3361 break;
3362 }
3363 }
3364 /* ... else fall through */
3365
3366 /* End of subject assertion (\z) */
3367
3368 case OP_EOD:
3369 if (eptr < md->end_subject) return FALSE;
3370 ecode++;
3371 break;
3372
3373 /* End of subject or ending \n assertion (\Z) */
3374
3375 case OP_EODN:
3376 if (eptr < md->end_subject - 1 ||
3377 (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3378 ecode++;
3379 break;
3380
3381 /* Word boundary assertions */
3382
3383 case OP_NOT_WORD_BOUNDARY:
3384 case OP_WORD_BOUNDARY:
3385 {
3386 BOOL prev_is_word = (eptr != md->start_subject) &&
3387 ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3388 BOOL cur_is_word = (eptr < md->end_subject) &&
3389 ((md->ctypes[*eptr] & ctype_word) != 0);
3390 if ((*ecode++ == OP_WORD_BOUNDARY)?
3391 cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3392 return FALSE;
3393 }
3394 break;
3395
3396 /* Match a single character type; inline for speed */
3397
3398 case OP_ANY:
3399 if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3400 return FALSE;
3401 if (eptr++ >= md->end_subject) return FALSE;
3402 ecode++;
3403 break;
3404
3405 case OP_NOT_DIGIT:
3406 if (eptr >= md->end_subject ||
3407 (md->ctypes[*eptr++] & ctype_digit) != 0)
3408 return FALSE;
3409 ecode++;
3410 break;
3411
3412 case OP_DIGIT:
3413 if (eptr >= md->end_subject ||
3414 (md->ctypes[*eptr++] & ctype_digit) == 0)
3415 return FALSE;
3416 ecode++;
3417 break;
3418
3419 case OP_NOT_WHITESPACE:
3420 if (eptr >= md->end_subject ||
3421 (md->ctypes[*eptr++] & ctype_space) != 0)
3422 return FALSE;
3423 ecode++;
3424 break;
3425
3426 case OP_WHITESPACE:
3427 if (eptr >= md->end_subject ||
3428 (md->ctypes[*eptr++] & ctype_space) == 0)
3429 return FALSE;
3430 ecode++;
3431 break;
3432
3433 case OP_NOT_WORDCHAR:
3434 if (eptr >= md->end_subject ||
3435 (md->ctypes[*eptr++] & ctype_word) != 0)
3436 return FALSE;
3437 ecode++;
3438 break;
3439
3440 case OP_WORDCHAR:
3441 if (eptr >= md->end_subject ||
3442 (md->ctypes[*eptr++] & ctype_word) == 0)
3443 return FALSE;
3444 ecode++;
3445 break;
3446
3447 /* Match a back reference, possibly repeatedly. Look past the end of the
3448 item to see if there is repeat information following. The code is similar
3449 to that for character classes, but repeated for efficiency. Then obey
3450 similar code to character type repeats - written out again for speed.
3451 However, if the referenced string is the empty string, always treat
3452 it as matched, any number of times (otherwise there could be infinite
3453 loops). */
3454
3455 case OP_REF:
3456 {
3457 int length;
3458 int offset = ecode[1] << 1; /* Doubled reference number */
3459 ecode += 2; /* Advance past the item */
3460
3461 /* If the reference is unset, set the length to be longer than the amount
3462 of subject left; this ensures that every attempt at a match fails. We
3463 can't just fail here, because of the possibility of quantifiers with zero
3464 minima. */
3465
3466 length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3467 md->end_subject - eptr + 1 :
3468 md->offset_vector[offset+1] - md->offset_vector[offset];
3469
3470 /* Set up for repetition, or handle the non-repeated case */
3471
3472 switch (*ecode)
3473 {
3474 case OP_CRSTAR:
3475 case OP_CRMINSTAR:
3476 case OP_CRPLUS:
3477 case OP_CRMINPLUS:
3478 case OP_CRQUERY:
3479 case OP_CRMINQUERY:
3480 c = *ecode++ - OP_CRSTAR;
3481 minimize = (c & 1) != 0;
3482 min = rep_min[c]; /* Pick up values from tables; */
3483 max = rep_max[c]; /* zero for max => infinity */
3484 if (max == 0) max = INT_MAX;
3485 break;
3486
3487 case OP_CRRANGE:
3488 case OP_CRMINRANGE:
3489 minimize = (*ecode == OP_CRMINRANGE);
3490 min = (ecode[1] << 8) + ecode[2];
3491 max = (ecode[3] << 8) + ecode[4];
3492 if (max == 0) max = INT_MAX;
3493 ecode += 5;
3494 break;
3495
3496 default: /* No repeat follows */
3497 if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3498 eptr += length;
3499 continue; /* With the main loop */
3500 }
3501
3502 /* If the length of the reference is zero, just continue with the
3503 main loop. */
3504
3505 if (length == 0) continue;
3506
3507 /* First, ensure the minimum number of matches are present. We get back
3508 the length of the reference string explicitly rather than passing the
3509 address of eptr, so that eptr can be a register variable. */
3510
3511 for (i = 1; i <= min; i++)
3512 {
3513 if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3514 eptr += length;
3515 }
3516
3517 /* If min = max, continue at the same level without recursion.
3518 They are not both allowed to be zero. */
3519
3520 if (min == max) continue;
3521
3522 /* If minimizing, keep trying and advancing the pointer */
3523
3524 if (minimize)
3525 {
3526 for (i = min;; i++)
3527 {
3528 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3529 return TRUE;
3530 if (i >= max || !match_ref(offset, eptr, length, md, ims))
3531 return FALSE;
3532 eptr += length;
3533 }
3534 /* Control never gets here */
3535 }
3536
3537 /* If maximizing, find the longest string and work backwards */
3538
3539 else
3540 {
3541 const uschar *pp = eptr;
3542 for (i = min; i < max; i++)
3543 {
3544 if (!match_ref(offset, eptr, length, md, ims)) break;
3545 eptr += length;
3546 }
3547 while (eptr >= pp)
3548 {
3549 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3550 return TRUE;
3551 eptr -= length;
3552 }
3553 return FALSE;
3554 }
3555 }
3556 /* Control never gets here */
3557
3558
3559
3560 /* Match a character class, possibly repeatedly. Look past the end of the
3561 item to see if there is repeat information following. Then obey similar
3562 code to character type repeats - written out again for speed. */
3563
3564 case OP_CLASS:
3565 {
3566 const uschar *data = ecode + 1; /* Save for matching */
3567 ecode += 33; /* Advance past the item */
3568
3569 switch (*ecode)
3570 {
3571 case OP_CRSTAR:
3572 case OP_CRMINSTAR:
3573 case OP_CRPLUS:
3574 case OP_CRMINPLUS:
3575 case OP_CRQUERY:
3576 case OP_CRMINQUERY:
3577 c = *ecode++ - OP_CRSTAR;
3578 minimize = (c & 1) != 0;
3579 min = rep_min[c]; /* Pick up values from tables; */
3580 max = rep_max[c]; /* zero for max => infinity */
3581 if (max == 0) max = INT_MAX;
3582 break;
3583
3584 case OP_CRRANGE:
3585 case OP_CRMINRANGE:
3586 minimize = (*ecode == OP_CRMINRANGE);
3587 min = (ecode[1] << 8) + ecode[2];
3588 max = (ecode[3] << 8) + ecode[4];
3589 if (max == 0) max = INT_MAX;
3590 ecode += 5;
3591 break;
3592
3593 default: /* No repeat follows */
3594 min = max = 1;
3595 break;
3596 }
3597
3598 /* First, ensure the minimum number of matches are present. */
3599
3600 for (i = 1; i <= min; i++)
3601 {
3602 if (eptr >= md->end_subject) return FALSE;
3603 c = *eptr++;
3604 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3605 return FALSE;
3606 }
3607
3608 /* If max == min we can continue with the main loop without the
3609 need to recurse. */
3610
3611 if (min == max) continue;
3612
3613 /* If minimizing, keep testing the rest of the expression and advancing
3614 the pointer while it matches the class. */
3615
3616 if (minimize)
3617 {
3618 for (i = min;; i++)
3619 {
3620 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3621 return TRUE;
3622 if (i >= max || eptr >= md->end_subject) return FALSE;
3623 c = *eptr++;
3624 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3625 return FALSE;
3626 }
3627 /* Control never gets here */
3628 }
3629
3630 /* If maximizing, find the longest possible run, then work backwards. */
3631
3632 else
3633 {
3634 const uschar *pp = eptr;
3635 for (i = min; i < max; eptr++, i++)
3636 {
3637 if (eptr >= md->end_subject) break;
3638 c = *eptr;
3639 if ((data[c/8] & (1 << (c&7))) != 0) continue;
3640 break;
3641 }
3642
3643 while (eptr >= pp)
3644 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3645 return TRUE;
3646 return FALSE;
3647 }
3648 }
3649 /* Control never gets here */
3650
3651 /* Match a run of characters */
3652
3653 case OP_CHARS:
3654 {
3655 register int length = ecode[1];
3656 ecode += 2;
3657
3658 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3659 if (eptr >= md->end_subject)
3660 printf("matching subject <null> against pattern ");
3661 else
3662 {
3663 printf("matching subject ");
3664 pchars(eptr, length, TRUE, md);
3665 printf(" against pattern ");
3666 }
3667 pchars(ecode, length, FALSE, md);
3668 printf("\n");
3669 #endif
3670
3671 if (length > md->end_subject - eptr) return FALSE;
3672 if ((ims & PCRE_CASELESS) != 0)
3673 {
3674 while (length-- > 0)
3675 if (md->lcc[*ecode++] != md->lcc[*eptr++])
3676 return FALSE;
3677 }
3678 else
3679 {
3680 while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
3681 }
3682 }
3683 break;
3684
3685 /* Match a single character repeatedly; different opcodes share code. */
3686
3687 case OP_EXACT:
3688 min = max = (ecode[1] << 8) + ecode[2];
3689 ecode += 3;
3690 goto REPEATCHAR;
3691
3692 case OP_UPTO:
3693 case OP_MINUPTO:
3694 min = 0;
3695 max = (ecode[1] << 8) + ecode[2];
3696 minimize = *ecode == OP_MINUPTO;
3697 ecode += 3;
3698 goto REPEATCHAR;
3699
3700 case OP_STAR:
3701 case OP_MINSTAR:
3702 case OP_PLUS:
3703 case OP_MINPLUS:
3704 case OP_QUERY:
3705 case OP_MINQUERY:
3706 c = *ecode++ - OP_STAR;
3707 minimize = (c & 1) != 0;
3708 min = rep_min[c]; /* Pick up values from tables; */
3709 max = rep_max[c]; /* zero for max => infinity */
3710 if (max == 0) max = INT_MAX;
3711
3712 /* Common code for all repeated single-character matches. We can give
3713 up quickly if there are fewer than the minimum number of characters left in
3714 the subject. */
3715
3716 REPEATCHAR:
3717 if (min > md->end_subject - eptr) return FALSE;
3718 c = *ecode++;
3719
3720 /* The code is duplicated for the caseless and caseful cases, for speed,
3721 since matching characters is likely to be quite common. First, ensure the
3722 minimum number of matches are present. If min = max, continue at the same
3723 level without recursing. Otherwise, if minimizing, keep trying the rest of
3724 the expression and advancing one matching character if failing, up to the
3725 maximum. Alternatively, if maximizing, find the maximum number of
3726 characters and work backwards. */
3727
3728 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3729 max, eptr));
3730
3731 if ((ims & PCRE_CASELESS) != 0)
3732 {
3733 c = md->lcc[c];
3734 for (i = 1; i <= min; i++)
3735 if (c != md->lcc[*eptr++]) return FALSE;
3736 if (min == max) continue;
3737 if (minimize)
3738 {
3739 for (i = min;; i++)
3740 {
3741 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3742 return TRUE;
3743 if (i >= max || eptr >= md->end_subject ||
3744 c != md->lcc[*eptr++])
3745 return FALSE;
3746 }
3747 /* Control never gets here */
3748 }
3749 else
3750 {
3751 const uschar *pp = eptr;
3752 for (i = min; i < max; i++)
3753 {
3754 if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3755 eptr++;
3756 }
3757 while (eptr >= pp)
3758 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3759 return TRUE;
3760 return FALSE;
3761 }
3762 /* Control never gets here */
3763 }
3764
3765 /* Caseful comparisons */
3766
3767 else
3768 {
3769 for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3770 if (min == max) continue;
3771 if (minimize)
3772 {
3773 for (i = min;; i++)
3774 {
3775 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3776 return TRUE;
3777 if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3778 }
3779 /* Control never gets here */
3780 }
3781 else
3782 {
3783 const uschar *pp = eptr;
3784 for (i = min; i < max; i++)
3785 {
3786 if (eptr >= md->end_subject || c != *eptr) break;
3787 eptr++;
3788 }
3789 while (eptr >= pp)
3790 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3791 return TRUE;
3792 return FALSE;
3793 }
3794 }
3795 /* Control never gets here */
3796
3797 /* Match a negated single character */
3798
3799 case OP_NOT:
3800 if (eptr >= md->end_subject) return FALSE;
3801 ecode++;
3802 if ((ims & PCRE_CASELESS) != 0)
3803 {
3804 if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3805 }
3806 else
3807 {
3808 if (*ecode++ == *eptr++) return FALSE;
3809 }
3810 break;
3811
3812 /* Match a negated single character repeatedly. This is almost a repeat of
3813 the code for a repeated single character, but I haven't found a nice way of
3814 commoning these up that doesn't require a test of the positive/negative
3815 option for each character match. Maybe that wouldn't add very much to the
3816 time taken, but character matching *is* what this is all about... */
3817
3818 case OP_NOTEXACT:
3819 min = max = (ecode[1] << 8) + ecode[2];
3820 ecode += 3;
3821 goto REPEATNOTCHAR;
3822
3823 case OP_NOTUPTO:
3824 case OP_NOTMINUPTO:
3825 min = 0;
3826 max = (ecode[1] << 8) + ecode[2];
3827 minimize = *ecode == OP_NOTMINUPTO;
3828 ecode += 3;
3829 goto REPEATNOTCHAR;
3830
3831 case OP_NOTSTAR:
3832 case OP_NOTMINSTAR:
3833 case OP_NOTPLUS:
3834 case OP_NOTMINPLUS:
3835 case OP_NOTQUERY:
3836 case OP_NOTMINQUERY:
3837 c = *ecode++ - OP_NOTSTAR;
3838 minimize = (c & 1) != 0;
3839 min = rep_min[c]; /* Pick up values from tables; */
3840 max = rep_max[c]; /* zero for max => infinity */
3841 if (max == 0) max = INT_MAX;
3842
3843 /* Common code for all repeated single-character matches. We can give
3844 up quickly if there are fewer than the minimum number of characters left in
3845 the subject. */
3846
3847 REPEATNOTCHAR:
3848 if (min > md->end_subject - eptr) return FALSE;
3849 c = *ecode++;
3850
3851 /* The code is duplicated for the caseless and caseful cases, for speed,
3852 since matching characters is likely to be quite common. First, ensure the
3853 minimum number of matches are present. If min = max, continue at the same
3854 level without recursing. Otherwise, if minimizing, keep trying the rest of
3855 the expression and advancing one matching character if failing, up to the
3856 maximum. Alternatively, if maximizing, find the maximum number of
3857 characters and work backwards. */
3858
3859 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3860 max, eptr));
3861
3862 if ((ims & PCRE_CASELESS) != 0)
3863 {
3864 c = md->lcc[c];
3865 for (i = 1; i <= min; i++)
3866 if (c == md->lcc[*eptr++]) return FALSE;
3867 if (min == max) continue;
3868 if (minimize)
3869 {
3870 for (i = min;; i++)
3871 {
3872 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3873 return TRUE;
3874 if (i >= max || eptr >= md->end_subject ||
3875 c == md->lcc[*eptr++])
3876 return FALSE;
3877 }
3878 /* Control never gets here */
3879 }
3880 else
3881 {
3882 const uschar *pp = eptr;
3883 for (i = min; i < max; i++)
3884 {
3885 if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3886 eptr++;
3887 }
3888 while (eptr >= pp)
3889 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3890 return TRUE;
3891 return FALSE;
3892 }
3893 /* Control never gets here */
3894 }
3895
3896 /* Caseful comparisons */
3897
3898 else
3899 {
3900 for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3901 if (min == max) continue;
3902 if (minimize)
3903 {
3904 for (i = min;; i++)
3905 {
3906 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3907 return TRUE;
3908 if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3909 }
3910 /* Control never gets here */
3911 }
3912 else
3913 {
3914 const uschar *pp = eptr;
3915 for (i = min; i < max; i++)
3916 {
3917 if (eptr >= md->end_subject || c == *eptr) break;
3918 eptr++;
3919 }
3920 while (eptr >= pp)
3921 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3922 return TRUE;
3923 return FALSE;
3924 }
3925 }
3926 /* Control never gets here */
3927
3928 /* Match a single character type repeatedly; several different opcodes
3929 share code. This is very similar to the code for single characters, but we
3930 repeat it in the interests of efficiency. */
3931
3932 case OP_TYPEEXACT:
3933 min = max = (ecode[1] << 8) + ecode[2];
3934 minimize = TRUE;
3935 ecode += 3;
3936 goto REPEATTYPE;
3937
3938 case OP_TYPEUPTO:
3939 case OP_TYPEMINUPTO:
3940 min = 0;
3941 max = (ecode[1] << 8) + ecode[2];
3942 minimize = *ecode == OP_TYPEMINUPTO;
3943 ecode += 3;
3944 goto REPEATTYPE;
3945
3946 case OP_TYPESTAR:
3947 case OP_TYPEMINSTAR:
3948 case OP_TYPEPLUS:
3949 case OP_TYPEMINPLUS:
3950 case OP_TYPEQUERY:
3951 case OP_TYPEMINQUERY:
3952 c = *ecode++ - OP_TYPESTAR;
3953 minimize = (c & 1) != 0;
3954 min = rep_min[c]; /* Pick up values from tables; */
3955 max = rep_max[c]; /* zero for max => infinity */
3956 if (max == 0) max = INT_MAX;
3957
3958 /* Common code for all repeated single character type matches */
3959
3960 REPEATTYPE:
3961 ctype = *ecode++; /* Code for the character type */
3962
3963 /* First, ensure the minimum number of matches are present. Use inline
3964 code for maximizing the speed, and do the type test once at the start
3965 (i.e. keep it out of the loop). Also test that there are at least the
3966 minimum number of characters before we start. */
3967
3968 if (min > md->end_subject - eptr) return FALSE;
3969 if (min > 0) switch(ctype)
3970 {
3971 case OP_ANY:
3972 if ((ims & PCRE_DOTALL) == 0)
3973 { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3974 else eptr += min;
3975 break;
3976
3977 case OP_NOT_DIGIT:
3978 for (i = 1; i <= min; i++)
3979 if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3980 break;
3981
3982 case OP_DIGIT:
3983 for (i = 1; i <= min; i++)
3984 if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3985 break;
3986
3987 case OP_NOT_WHITESPACE:
3988 for (i = 1; i <= min; i++)
3989 if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3990 break;
3991
3992 case OP_WHITESPACE:
3993 for (i = 1; i <= min; i++)
3994 if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3995 break;
3996
3997 case OP_NOT_WORDCHAR:
3998 for (i = 1; i <= min; i++)
3999 if ((md->ctypes[*eptr++] & ctype_word) != 0)
4000 return FALSE;
4001 break;
4002
4003 case OP_WORDCHAR:
4004 for (i = 1; i <= min; i++)
4005 if ((md->ctypes[*eptr++] & ctype_word) == 0)
4006 return FALSE;
4007 break;
4008 }
4009
4010 /* If min = max, continue at the same level without recursing */
4011
4012 if (min == max) continue;
4013
4014 /* If minimizing, we have to test the rest of the pattern before each
4015 subsequent match. */
4016
4017 if (minimize)
4018 {
4019 for (i = min;; i++)
4020 {
4021 if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
4022 if (i >= max || eptr >= md->end_subject) return FALSE;
4023
4024 c = *eptr++;
4025 switch(ctype)
4026 {
4027 case OP_ANY:
4028 if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
4029 break;
4030
4031 case OP_NOT_DIGIT:
4032 if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
4033 break;
4034
4035 case OP_DIGIT:
4036 if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
4037 break;
4038
4039 case OP_NOT_WHITESPACE:
4040 if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
4041 break;
4042
4043 case OP_WHITESPACE:
4044 if ((md->ctypes[c] & ctype_space) == 0) return FALSE;
4045 break;
4046
4047 case OP_NOT_WORDCHAR:
4048 if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
4049 break;
4050
4051 case OP_WORDCHAR:
4052 if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
4053 break;
4054 }
4055 }
4056 /* Control never gets here */
4057 }
4058
4059 /* If maximizing it is worth using inline code for speed, doing the type
4060 test once at the start (i.e. keep it out of the loop). */
4061
4062 else
4063 {
4064 const uschar *pp = eptr;
4065 switch(ctype)
4066 {
4067 case OP_ANY:
4068 if ((ims & PCRE_DOTALL) == 0)
4069 {
4070 for (i = min; i < max; i++)
4071 {
4072 if (eptr >= md->end_subject || *eptr == '\n') break;
4073 eptr++;
4074 }
4075 }
4076 else
4077 {
4078 c = max - min;
4079 if (c > md->end_subject - eptr) c = md->end_subject - eptr;
4080 eptr += c;
4081 }
4082 break;
4083
4084 case OP_NOT_DIGIT:
4085 for (i = min; i < max; i++)
4086 {
4087 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4088 break;
4089 eptr++;
4090 }
4091 break;
4092
4093 case OP_DIGIT:
4094 for (i = min; i < max; i++)
4095 {
4096 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4097 break;
4098 eptr++;
4099 }
4100 break;
4101
4102 case OP_NOT_WHITESPACE:
4103 for (i = min; i < max; i++)
4104 {
4105 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4106 break;
4107 eptr++;
4108 }
4109 break;
4110
4111 case OP_WHITESPACE:
4112 for (i = min; i < max; i++)
4113 {
4114 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4115 break;
4116 eptr++;
4117 }
4118 break;
4119
4120 case OP_NOT_WORDCHAR:
4121 for (i = min; i < max; i++)
4122 {
4123 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4124 break;
4125 eptr++;
4126 }
4127 break;
4128
4129 case OP_WORDCHAR:
4130 for (i = min; i < max; i++)
4131 {
4132 if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4133 break;
4134 eptr++;
4135 }
4136 break;
4137 }
4138
4139 while (eptr >= pp)
4140 if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4141 return TRUE;
4142 return FALSE;
4143 }
4144 /* Control never gets here */
4145
4146 /* There's been some horrible disaster. */
4147
4148 default:
4149 DPRINTF(("Unknown opcode %d\n", *ecode));
4150 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
4151 return FALSE;
4152 }
4153
4154 /* Do not stick any code in here without much thought; it is assumed
4155 that "continue" in the code above comes out to here to repeat the main
4156 loop. */
4157
4158 } /* End of main loop */
4159 /* Control never reaches here */
4160 }
4161
4162
4163
4164
4165 /*************************************************
4166 * Execute a Regular Expression *
4167 *************************************************/
4168
4169 /* This function applies a compiled re to a subject string and picks out
4170 portions of the string if it matches. Two elements in the vector are set for
4171 each substring: the offsets to the start and end of the substring.
4172
4173 Arguments:
4174 external_re points to the compiled expression
4175 external_extra points to "hints" from pcre_study() or is NULL
4176 subject points to the subject string
4177 length length of subject string (may contain binary zeros)
4178 start_offset where to start in the subject string
4179 options option bits
4180 offsets points to a vector of ints to be filled in with offsets
4181 offsetcount the number of elements in the vector
4182
4183 Returns: > 0 => success; value is the number of elements filled in
4184 = 0 => success, but offsets is not big enough
4185 -1 => failed to match
4186 < -1 => some kind of unexpected problem
4187 */
4188
4189 int
4190 pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
4191 const char *subject, int length, int start_offset, int options, int *offsets,
4192 int offsetcount)
4193 {
4194 int resetcount, ocount;
4195 int first_char = -1;
4196 int req_char = -1;
4197 int req_char2 = -1;
4198 unsigned long int ims = 0;
4199 match_data match_block;
4200 const uschar *start_bits = NULL;
4201 const uschar *start_match = (const uschar *)subject + start_offset;
4202 const uschar *end_subject;
4203 const uschar *req_char_ptr = start_match - 1;
4204 const real_pcre *re = (const real_pcre *)external_re;
4205 const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4206 BOOL using_temporary_offsets = FALSE;
4207 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4208 BOOL startline = (re->options & PCRE_STARTLINE) != 0;
4209
4210 if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4211
4212 if (re == NULL || subject == NULL ||
4213 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4214 if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4215
4216 match_block.start_subject = (const uschar *)subject;
4217 match_block.end_subject = match_block.start_subject + length;
4218 end_subject = match_block.end_subject;
4219
4220 match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4221
4222 match_block.notbol = (options & PCRE_NOTBOL) != 0;
4223 match_block.noteol = (options & PCRE_NOTEOL) != 0;
4224 match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4225
4226 match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
4227
4228 match_block.lcc = re->tables + lcc_offset;
4229 match_block.ctypes = re->tables + ctypes_offset;
4230
4231 /* The ims options can vary during the matching as a result of the presence
4232 of (?ims) items in the pattern. They are kept in a local variable so that
4233 restoring at the exit of a group is easy. */
4234
4235 ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
4236
4237 /* If the expression has got more back references than the offsets supplied can
4238 hold, we get a temporary bit of working store to use during the matching.
4239 Otherwise, we can use the vector supplied, rounding down its size to a multiple
4240 of 3. */
4241
4242 ocount = offsetcount - (offsetcount % 3);
4243
4244 if (re->top_backref > 0 && re->top_backref >= ocount/3)
4245 {
4246 ocount = re->top_backref * 3 + 3;
4247 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
4248 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
4249 using_temporary_offsets = TRUE;
4250 DPRINTF(("Got memory to hold back references\n"));
4251 }
4252 else match_block.offset_vector = offsets;
4253
4254 match_block.offset_end = ocount;
4255 match_block.offset_max = (2*ocount)/3;
4256 match_block.offset_overflow = FALSE;
4257
4258 /* Compute the minimum number of offsets that we need to reset each time. Doing
4259 this makes a huge difference to execution time when there aren't many brackets
4260 in the pattern. */
4261
4262 resetcount = 2 + re->top_bracket * 2;
4263 if (resetcount > offsetcount) resetcount = ocount;
4264
4265 /* Reset the working variable associated with each extraction. These should
4266 never be used unless previously set, but they get saved and restored, and so we
4267 initialize them to avoid reading uninitialized locations. */
4268
4269 if (match_block.offset_vector != NULL)
4270 {
4271 register int *iptr = match_block.offset_vector + ocount;
4272 register int *iend = iptr - resetcount/2 + 1;
4273 while (--iptr >= iend) *iptr = -1;
4274 }
4275
4276 /* Set up the first character to match, if available. The first_char value is
4277 never set for an anchored regular expression, but the anchoring may be forced
4278 at run time, so we have to test for anchoring. The first char may be unset for
4279 an unanchored pattern, of course. If there's no first char and the pattern was
4280 studied, there may be a bitmap of possible first characters. */
4281
4282 if (!anchored)
4283 {
4284 if ((re->options & PCRE_FIRSTSET) != 0)
4285 {
4286 first_char = re->first_char;
4287 if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char];
4288 }
4289 else
4290 if (!startline && extra != NULL &&
4291 (extra->options & PCRE_STUDY_MAPPED) != 0)
4292 start_bits = extra->start_bits;
4293 }
4294
4295 /* For anchored or unanchored matches, there may be a "last known required
4296 character" set. If the PCRE_CASELESS is set, implying that the match starts
4297 caselessly, or if there are any changes of this flag within the regex, set up
4298 both cases of the character. Otherwise set the two values the same, which will
4299 avoid duplicate testing (which takes significant time). This covers the vast
4300 majority of cases. It will be suboptimal when the case flag changes in a regex
4301 and the required character in fact is caseful. */
4302
4303 if ((re->options & PCRE_REQCHSET) != 0)
4304 {
4305 req_char = re->req_char;
4306 req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
4307 (re->tables + fcc_offset)[req_char] : req_char;
4308 }
4309
4310 /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4311 the loop runs just once. */
4312
4313 do
4314 {
4315 int rc;
4316 register int *iptr = match_block.offset_vector;
4317 register int *iend = iptr + resetcount;
4318
4319 /* Reset the maximum number of extractions we might see. */
4320
4321 while (iptr < iend) *iptr++ = -1;
4322
4323 /* Advance to a unique first char if possible */
4324
4325 if (first_char >= 0)
4326 {
4327 if ((ims & PCRE_CASELESS) != 0)
4328 while (start_match < end_subject &&
4329 match_block.lcc[*start_match] != first_char)
4330 start_match++;
4331 else
4332 while (start_match < end_subject && *start_match != first_char)
4333 start_match++;
4334 }
4335
4336 /* Or to just after \n for a multiline match if possible */
4337
4338 else if (startline)
4339 {
4340 if (start_match > match_block.start_subject)
4341 {
4342 while (start_match < end_subject && start_match[-1] != '\n')
4343 start_match++;
4344 }
4345 }
4346
4347 /* Or to a non-unique first char after study */
4348
4349 else if (start_bits != NULL)
4350 {
4351 while (start_match < end_subject)
4352 {
4353 register int c = *start_match;
4354 if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
4355 }
4356 }
4357
4358 #ifdef DEBUG /* Sigh. Some compilers never learn. */
4359 printf(">>>> Match against: ");
4360 pchars(start_match, end_subject - start_match, TRUE, &match_block);
4361 printf("\n");
4362 #endif
4363
4364 /* If req_char is set, we know that that character must appear in the subject
4365 for the match to succeed. If the first character is set, req_char must be
4366 later in the subject; otherwise the test starts at the match point. This
4367 optimization can save a huge amount of backtracking in patterns with nested
4368 unlimited repeats that aren't going to match. We don't know what the state of
4369 case matching may be when this character is hit, so test for it in both its
4370 cases if necessary. However, the different cased versions will not be set up
4371 unless PCRE_CASELESS was given or the casing state changes within the regex.
4372 Writing separate code makes it go faster, as does using an autoincrement and
4373 backing off on a match. */
4374
4375 if (req_char >= 0)
4376 {
4377 register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
4378
4379 /* We don't need to repeat the search if we haven't yet reached the
4380 place we found it at last time. */
4381
4382 if (p > req_char_ptr)
4383 {
4384 /* Do a single test if no case difference is set up */
4385
4386 if (req_char == req_char2)
4387 {
4388 while (p < end_subject)
4389 {
4390 if (*p++ == req_char) { p--; break; }
4391 }
4392 }
4393
4394 /* Otherwise test for either case */
4395
4396 else
4397 {
4398 while (p < end_subject)
4399 {
4400 register int pp = *p++;
4401 if (pp == req_char || pp == req_char2) { p--; break; }
4402 }
4403 }
4404
4405 /* If we can't find the required character, break the matching loop */
4406
4407 if (p >= end_subject) break;
4408
4409 /* If we have found the required character, save the point where we
4410 found it, so that we don't search again next time round the loop if
4411 the start hasn't passed this character yet. */
4412
4413 req_char_ptr = p;
4414 }
4415 }
4416
4417 /* When a match occurs, substrings will be set for all internal extractions;
4418 we just need to set up the whole thing as substring 0 before returning. If
4419 there were too many extractions, set the return code to zero. In the case
4420 where we had to get some local store to hold offsets for backreferences, copy
4421 those back references that we can. In this case there need not be overflow
4422 if certain parts of the pattern were not used. */
4423
4424 match_block.start_match = start_match;
4425 if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4426 continue;
4427
4428 /* Copy the offset information from temporary store if necessary */
4429
4430 if (using_temporary_offsets)
4431 {
4432 if (offsetcount >= 4)
4433 {
4434 memcpy(offsets + 2, match_block.offset_vector + 2,
4435 (offsetcount - 2) * sizeof(int));
4436 DPRINTF(("Copied offsets from temporary memory\n"));
4437 }
4438 if (match_block.end_offset_top > offsetcount)
4439 match_block.offset_overflow = TRUE;
4440
4441 DPRINTF(("Freeing temporary memory\n"));
4442 (pcre_free)(match_block.offset_vector);
4443 }
4444
4445 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
4446
4447 if (match_block.offset_end < 2) rc = 0; else
4448 {
4449 offsets[0] = start_match - match_block.start_subject;
4450 offsets[1] = match_block.end_match_ptr - match_block.start_subject;
4451 }
4452
4453 DPRINTF((">>>> returning %d\n", rc));
4454 return rc;
4455 }
4456
4457 /* This "while" is the end of the "do" above */
4458
4459 while (!anchored &&
4460 match_block.errorcode == PCRE_ERROR_NOMATCH &&
4461 start_match++ < end_subject);
4462
4463 if (using_temporary_offsets)
4464 {
4465 DPRINTF(("Freeing temporary memory\n"));
4466 (pcre_free)(match_block.offset_vector);
4467 }
4468
4469 DPRINTF((">>>> returning %d\n", match_block.errorcode));
4470
4471 return match_block.errorcode;
4472 }
4473
4474 /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12