/[pcre]/code/trunk/pcre.c
ViewVC logotype

Contents of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (hide annotations) (download)
Sat Feb 24 21:38:25 2007 UTC (7 years, 2 months ago) by nigel
File MIME type: text/plain
File size: 103595 byte(s)
Load pcre-1.06 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12 nigel 15 Copyright (c) 1998 University of Cambridge
13 nigel 3
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Define DEBUG to get debugging output on stdout. */
33    
34     /* #define DEBUG */
35    
36 nigel 9 /* Use a macro for debugging printing, 'cause that eliminates the the use
37     of #ifdef inline, and there are *still* stupid compilers about that don't like
38     indented pre-processor statements. I suppose it's only been 10 years... */
39 nigel 3
40 nigel 9 #ifdef DEBUG
41     #define DPRINTF(p) printf p
42     #else
43     #define DPRINTF(p) /*nothing*/
44     #endif
45    
46 nigel 3 /* Include the internals header, which itself includes Standard C headers plus
47     the external pcre header. */
48    
49     #include "internal.h"
50    
51    
52 nigel 15 /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54     #ifdef __cplusplus
55     #define class pcre_class
56     #endif
57    
58    
59 nigel 3 /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61 nigel 15 static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62     static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63 nigel 3
64 nigel 13 /* Text forms of OP_ values and things, for debugging (not all used) */
65 nigel 3
66     #ifdef DEBUG
67 nigel 7 static const char *OP_names[] = {
68     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70     "not",
71     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74     "*", "*?", "+", "+?", "?", "??", "{", "{",
75 nigel 13 "class", "negclass", "Ref",
76 nigel 3 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77     "Brazero", "Braminzero", "Bra"
78     };
79     #endif
80    
81     /* Table for handling escaped characters in the range '0'-'z'. Positive returns
82     are simple data values; negative values are for special things like \d and so
83     on. Zero means further processing is needed (for things like \x), or the escape
84     is invalid. */
85    
86 nigel 15 static const short int escapes[] = {
87 nigel 3 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
88     0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
89     '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
90     0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
91     0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
92     0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
93     '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
94     0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
95     0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
96     0, 0, 0 /* x - z */
97     };
98    
99     /* Definition to allow mutual recursion */
100    
101 nigel 13 static BOOL
102     compile_regex(int, int *, uschar **, const uschar **, const char **);
103 nigel 3
104     /* Structure for passing "static" information around between the functions
105     doing the matching, so that they are thread-safe. */
106    
107     typedef struct match_data {
108     int errorcode; /* As it says */
109     int *offset_vector; /* Offset vector */
110     int offset_end; /* One past the end */
111     BOOL offset_overflow; /* Set if too many extractions */
112     BOOL caseless; /* Case-independent flag */
113     BOOL runtime_caseless; /* Caseless forced at run time */
114     BOOL multiline; /* Multiline flag */
115     BOOL notbol; /* NOTBOL flag */
116     BOOL noteol; /* NOTEOL flag */
117     BOOL dotall; /* Dot matches any char */
118     BOOL endonly; /* Dollar not before final \n */
119 nigel 7 const uschar *start_subject; /* Start of the subject string */
120     const uschar *end_subject; /* End of the subject string */
121 nigel 3 jmp_buf fail_env; /* Environment for longjump() break out */
122 nigel 7 const uschar *end_match_ptr; /* Subject position at end match */
123 nigel 3 int end_offset_top; /* Highwater mark at end of match */
124     } match_data;
125    
126    
127    
128     /*************************************************
129     * Global variables *
130     *************************************************/
131    
132     /* PCRE is thread-clean and doesn't use any global variables in the normal
133     sense. However, it calls memory allocation and free functions via the two
134     indirections below, which are can be changed by the caller, but are shared
135     between all threads. */
136    
137     void *(*pcre_malloc)(size_t) = malloc;
138     void (*pcre_free)(void *) = free;
139    
140    
141    
142    
143     /*************************************************
144     * Return version string *
145     *************************************************/
146    
147 nigel 7 const char *
148 nigel 3 pcre_version(void)
149     {
150     return PCRE_VERSION;
151     }
152    
153    
154    
155    
156     /*************************************************
157     * Return info about a compiled pattern *
158     *************************************************/
159    
160     /* This function picks potentially useful data out of the private
161     structure.
162    
163     Arguments:
164     external_re points to compiled code
165     optptr where to pass back the options
166     first_char where to pass back the first character,
167     or -1 if multiline and all branches start ^,
168     or -2 otherwise
169    
170     Returns: number of identifying extraction brackets
171     or negative values on error
172     */
173    
174     int
175     pcre_info(const pcre *external_re, int *optptr, int *first_char)
176     {
177 nigel 7 const real_pcre *re = (const real_pcre *)external_re;
178 nigel 3 if (re == NULL) return PCRE_ERROR_NULL;
179     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180     if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
181     if (first_char != NULL)
182     *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
183     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
184     return re->top_bracket;
185     }
186    
187    
188    
189    
190     #ifdef DEBUG
191     /*************************************************
192     * Debugging function to print chars *
193     *************************************************/
194    
195     /* Print a sequence of chars in printable format, stopping at the end of the
196     subject if the requested.
197    
198     Arguments:
199     p points to characters
200     length number to print
201     is_subject TRUE if printing from within md->start_subject
202     md pointer to matching data block, if is_subject is TRUE
203    
204     Returns: nothing
205     */
206    
207 nigel 9 static void
208     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209 nigel 3 {
210     int c;
211     if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
212     while (length-- > 0)
213     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
214     }
215     #endif
216    
217    
218    
219    
220     /*************************************************
221     * Check subpattern for empty operand *
222     *************************************************/
223    
224     /* This function checks a bracketed subpattern to see if any of the paths
225     through it could match an empty string. This is used to diagnose an error if
226     such a subpattern is followed by a quantifier with an unlimited upper bound.
227    
228     Argument:
229     code points to the opening bracket
230    
231     Returns: TRUE or FALSE
232     */
233    
234     static BOOL
235     could_be_empty(uschar *code)
236     {
237     do {
238     uschar *cc = code + 3;
239    
240     /* Scan along the opcodes for this branch; as soon as we find something
241     that matches a non-empty string, break out and advance to test the next
242     branch. If we get to the end of the branch, return TRUE for the whole
243     sub-expression. */
244    
245     for (;;)
246     {
247     /* Test an embedded subpattern; if it could not be empty, break the
248     loop. Otherwise carry on in the branch. */
249    
250 nigel 5 if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
251 nigel 3 {
252     if (!could_be_empty(cc)) break;
253     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
254     cc += 3;
255     }
256    
257     else switch (*cc)
258     {
259     /* Reached end of a branch: the subpattern may match the empty string */
260    
261     case OP_ALT:
262     case OP_KET:
263     case OP_KETRMAX:
264     case OP_KETRMIN:
265     return TRUE;
266    
267     /* Skip over assertive subpatterns */
268    
269     case OP_ASSERT:
270     case OP_ASSERT_NOT:
271     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
272     cc += 3;
273     break;
274    
275     /* Skip over things that don't match chars */
276    
277     case OP_SOD:
278     case OP_EOD:
279     case OP_CIRC:
280     case OP_DOLL:
281     case OP_BRAZERO:
282     case OP_BRAMINZERO:
283     case OP_NOT_WORD_BOUNDARY:
284     case OP_WORD_BOUNDARY:
285     cc++;
286     break;
287    
288     /* Skip over simple repeats with zero lower bound */
289    
290     case OP_STAR:
291     case OP_MINSTAR:
292     case OP_QUERY:
293     case OP_MINQUERY:
294 nigel 5 case OP_NOTSTAR:
295     case OP_NOTMINSTAR:
296     case OP_NOTQUERY:
297     case OP_NOTMINQUERY:
298 nigel 3 case OP_TYPESTAR:
299     case OP_TYPEMINSTAR:
300     case OP_TYPEQUERY:
301     case OP_TYPEMINQUERY:
302     cc += 2;
303     break;
304    
305     /* Skip over UPTOs (lower bound is zero) */
306    
307     case OP_UPTO:
308     case OP_MINUPTO:
309     case OP_TYPEUPTO:
310     case OP_TYPEMINUPTO:
311     cc += 4;
312     break;
313    
314     /* Check a class or a back reference for a zero minimum */
315    
316     case OP_CLASS:
317 nigel 13 case OP_NEGCLASS:
318 nigel 3 case OP_REF:
319 nigel 5 cc += (*cc == OP_REF)? 2 : 33;
320 nigel 3
321     switch (*cc)
322     {
323     case OP_CRSTAR:
324     case OP_CRMINSTAR:
325     case OP_CRQUERY:
326     case OP_CRMINQUERY:
327     cc++;
328     break;
329    
330     case OP_CRRANGE:
331     case OP_CRMINRANGE:
332     if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;
333     cc += 3;
334     break;
335    
336     default:
337     goto NEXT_BRANCH;
338     }
339     break;
340    
341     /* Anything else matches at least one character */
342    
343     default:
344     goto NEXT_BRANCH;
345     }
346     }
347    
348     NEXT_BRANCH:
349     code += (code[1] << 8) + code[2];
350     }
351     while (*code == OP_ALT);
352    
353     /* No branches match the empty string */
354    
355     return FALSE;
356     }
357    
358    
359    
360     /*************************************************
361     * Handle escapes *
362     *************************************************/
363    
364     /* This function is called when a \ has been encountered. It either returns a
365     positive value for a simple escape such as \n, or a negative value which
366     encodes one of the more complicated things such as \d. On entry, ptr is
367     pointing at the \. On exit, it is on the final character of the escape
368     sequence.
369    
370     Arguments:
371     ptrptr points to the pattern position pointer
372     errorptr points to the pointer to the error message
373     bracount number of previous extracting brackets
374     options the options bits
375     isclass TRUE if inside a character class
376    
377     Returns: zero or positive => a data character
378     negative => a special escape sequence
379     on error, errorptr is set
380     */
381    
382     static int
383 nigel 7 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
384     int options, BOOL isclass)
385 nigel 3 {
386 nigel 7 const uschar *ptr = *ptrptr;
387 nigel 3 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
388     int i;
389    
390     if (c == 0) *errorptr = ERR1;
391    
392     /* Digits or letters may have special meaning; all others are literals. */
393    
394     else if (c < '0' || c > 'z') {}
395    
396     /* Do an initial lookup in a table. A non-zero result is something that can be
397     returned immediately. Otherwise further processing may be required. */
398    
399     else if ((i = escapes[c - '0']) != 0) c = i;
400    
401     /* Escapes that need further processing, or are illegal. */
402    
403     else
404     {
405 nigel 7 const uschar *oldptr;
406 nigel 3 switch (c)
407     {
408     /* The handling of escape sequences consisting of a string of digits
409     starting with one that is not zero is not straightforward. By experiment,
410     the way Perl works seems to be as follows:
411    
412     Outside a character class, the digits are read as a decimal number. If the
413     number is less than 10, or if there are that many previous extracting
414     left brackets, then it is a back reference. Otherwise, up to three octal
415     digits are read to form an escaped byte. Thus \123 is likely to be octal
416     123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
417     value is greater than 377, the least significant 8 bits are taken. Inside a
418     character class, \ followed by a digit is always an octal number. */
419    
420     case '1': case '2': case '3': case '4': case '5':
421     case '6': case '7': case '8': case '9':
422    
423     if (!isclass)
424     {
425     oldptr = ptr;
426     c -= '0';
427     while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)
428     c = c * 10 + *(++ptr) - '0';
429     if (c < 10 || c <= bracount)
430     {
431     c = -(ESC_REF + c);
432     break;
433     }
434     ptr = oldptr; /* Put the pointer back and fall through */
435     }
436    
437     /* Handle an octal number following \. If the first digit is 8 or 9, Perl
438     generates a binary zero byte and treats the digit as a following literal.
439     Thus we have to pull back the pointer by one. */
440    
441     if ((c = *ptr) >= '8')
442     {
443     ptr--;
444     c = 0;
445     break;
446     }
447    
448     /* \0 always starts an octal number, but we may drop through to here with a
449     larger first octal digit */
450    
451     case '0':
452     c -= '0';
453     while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&
454     ptr[1] != '8' && ptr[1] != '9')
455     c = c * 8 + *(++ptr) - '0';
456     break;
457    
458     /* Special escapes not starting with a digit are straightforward */
459    
460     case 'x':
461     c = 0;
462     while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)
463     {
464     ptr++;
465     c = c * 16 + pcre_lcc[*ptr] -
466     (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
467     }
468     break;
469    
470     case 'c':
471     c = *(++ptr);
472     if (c == 0)
473     {
474     *errorptr = ERR2;
475     return 0;
476     }
477    
478     /* A letter is upper-cased; then the 0x40 bit is flipped */
479    
480     if (c >= 'a' && c <= 'z') c = pcre_fcc[c];
481     c ^= 0x40;
482     break;
483    
484     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
485     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
486     for Perl compatibility, it is a literal. */
487    
488     default:
489     if ((options & PCRE_EXTRA) != 0) switch(c)
490     {
491     case 'X':
492     c = -ESC_X; /* This could be a lookup if it ever got into Perl */
493     break;
494    
495     default:
496     *errorptr = ERR3;
497     break;
498     }
499     break;
500     }
501     }
502    
503     *ptrptr = ptr;
504     return c;
505     }
506    
507    
508    
509     /*************************************************
510     * Check for counted repeat *
511     *************************************************/
512    
513     /* This function is called when a '{' is encountered in a place where it might
514     start a quantifier. It looks ahead to see if it really is a quantifier or not.
515     It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
516     where the ddds are digits.
517    
518     Arguments:
519     p pointer to the first char after '{'
520    
521     Returns: TRUE or FALSE
522     */
523    
524     static BOOL
525 nigel 7 is_counted_repeat(const uschar *p)
526 nigel 3 {
527     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
528     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
529     if (*p == '}') return TRUE;
530    
531     if (*p++ != ',') return FALSE;
532     if (*p == '}') return TRUE;
533    
534     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
535     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
536     return (*p == '}');
537     }
538    
539    
540    
541     /*************************************************
542     * Read repeat counts *
543     *************************************************/
544    
545     /* Read an item of the form {n,m} and return the values. This is called only
546     after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
547     so the syntax is guaranteed to be correct, but we need to check the values.
548    
549     Arguments:
550     p pointer to first char after '{'
551     minp pointer to int for min
552     maxp pointer to int for max
553     returned as -1 if no max
554     errorptr points to pointer to error message
555    
556     Returns: pointer to '}' on success;
557     current ptr on error, with errorptr set
558     */
559    
560 nigel 7 static const uschar *
561     read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
562 nigel 3 {
563     int min = 0;
564     int max = -1;
565    
566     while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
567    
568     if (*p == '}') max = min; else
569     {
570     if (*(++p) != '}')
571     {
572     max = 0;
573     while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
574     if (max < min)
575     {
576     *errorptr = ERR4;
577     return p;
578     }
579     }
580     }
581    
582     /* Do paranoid checks, then fill in the required variables, and pass back the
583     pointer to the terminating '}'. */
584    
585     if (min > 65535 || max > 65535)
586     *errorptr = ERR5;
587     else
588     {
589     *minp = min;
590     *maxp = max;
591     }
592     return p;
593     }
594    
595    
596    
597     /*************************************************
598     * Compile one branch *
599     *************************************************/
600    
601     /* Scan the pattern, compiling it into the code vector.
602    
603     Arguments:
604     options the option bits
605     bracket points to number of brackets used
606     code points to the pointer to the current code point
607     ptrptr points to the current pattern pointer
608     errorptr points to pointer to error message
609    
610     Returns: TRUE on success
611     FALSE, with *errorptr set on error
612     */
613    
614     static BOOL
615 nigel 7 compile_branch(int options, int *brackets, uschar **codeptr,
616     const uschar **ptrptr, const char **errorptr)
617 nigel 3 {
618     int repeat_type, op_type;
619     int repeat_min, repeat_max;
620     int bravalue, length;
621     register int c;
622     register uschar *code = *codeptr;
623 nigel 7 const uschar *ptr = *ptrptr;
624     const uschar *oldptr;
625 nigel 3 uschar *previous = NULL;
626     uschar class[32];
627    
628     /* Switch on next character until the end of the branch */
629    
630     for (;; ptr++)
631     {
632     BOOL negate_class;
633     int class_charcount;
634     int class_lastchar;
635    
636     c = *ptr;
637     if ((options & PCRE_EXTENDED) != 0)
638     {
639     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
640     if (c == '#')
641     {
642     while ((c = *(++ptr)) != 0 && c != '\n');
643     continue;
644     }
645     }
646    
647     switch(c)
648     {
649     /* The branch terminates at end of string, |, or ). */
650    
651     case 0:
652     case '|':
653     case ')':
654     *codeptr = code;
655     *ptrptr = ptr;
656     return TRUE;
657    
658     /* Handle single-character metacharacters */
659    
660     case '^':
661     previous = NULL;
662     *code++ = OP_CIRC;
663     break;
664    
665     case '$':
666     previous = NULL;
667     *code++ = OP_DOLL;
668     break;
669    
670     case '.':
671     previous = code;
672     *code++ = OP_ANY;
673     break;
674    
675     /* Character classes. These always build a 32-byte bitmap of the permitted
676     characters, except in the special case where there is only one character.
677     For negated classes, we build the map as usual, then invert it at the end.
678     */
679    
680     case '[':
681     previous = code;
682    
683 nigel 13 /* If the first character is '^', set the negation flag, and use a
684     different opcode. This only matters if caseless matching is specified at
685     runtime. */
686 nigel 3
687     if ((c = *(++ptr)) == '^')
688     {
689     negate_class = TRUE;
690 nigel 13 *code++ = OP_NEGCLASS;
691 nigel 3 c = *(++ptr);
692     }
693 nigel 13 else
694     {
695     negate_class = FALSE;
696     *code++ = OP_CLASS;
697     }
698 nigel 3
699     /* Keep a count of chars so that we can optimize the case of just a single
700     character. */
701    
702     class_charcount = 0;
703     class_lastchar = -1;
704    
705     /* Initialize the 32-char bit map to all zeros. We have to build the
706     map in a temporary bit of store, in case the class contains only 1
707     character, because in that case the compiled code doesn't use the
708     bit map. */
709    
710     memset(class, 0, 32 * sizeof(uschar));
711    
712     /* Process characters until ] is reached. By writing this as a "do" it
713     means that an initial ] is taken as a data character. */
714    
715     do
716     {
717     if (c == 0)
718     {
719     *errorptr = ERR6;
720     goto FAILED;
721     }
722    
723     /* Backslash may introduce a single character, or it may introduce one
724     of the specials, which just set a flag. Escaped items are checked for
725     validity in the pre-compiling pass. The sequence \b is a special case.
726 nigel 7 Inside a class (and only there) it is treated as backspace. Elsewhere
727 nigel 3 it marks a word boundary. Other escapes have preset maps ready to
728     or into the one we are building. We assume they have more than one
729     character in them, so set class_count bigger than one. */
730    
731     if (c == '\\')
732     {
733     c = check_escape(&ptr, errorptr, *brackets, options, TRUE);
734     if (-c == ESC_b) c = '\b';
735     else if (c < 0)
736     {
737     class_charcount = 10;
738     switch (-c)
739     {
740     case ESC_d:
741     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];
742     continue;
743    
744     case ESC_D:
745     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];
746     continue;
747    
748     case ESC_w:
749     for (c = 0; c < 32; c++)
750     class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
751     continue;
752    
753     case ESC_W:
754     for (c = 0; c < 32; c++)
755     class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
756     continue;
757    
758     case ESC_s:
759     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];
760     continue;
761    
762     case ESC_S:
763     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];
764     continue;
765    
766     default:
767     *errorptr = ERR7;
768     goto FAILED;
769     }
770     }
771     /* Fall through if single character */
772     }
773    
774     /* A single character may be followed by '-' to form a range. However,
775     Perl does not permit ']' to be the end of the range. A '-' character
776     here is treated as a literal. */
777    
778     if (ptr[1] == '-' && ptr[2] != ']')
779     {
780     int d;
781     ptr += 2;
782     d = *ptr;
783    
784     if (d == 0)
785     {
786     *errorptr = ERR6;
787     goto FAILED;
788     }
789    
790     /* The second part of a range can be a single-character escape, but
791     not any of the other escapes. */
792    
793     if (d == '\\')
794     {
795     d = check_escape(&ptr, errorptr, *brackets, options, TRUE);
796     if (d < 0)
797     {
798     if (d == -ESC_b) d = '\b'; else
799     {
800     *errorptr = ERR7;
801     goto FAILED;
802     }
803     }
804     }
805    
806     if (d < c)
807     {
808     *errorptr = ERR8;
809     goto FAILED;
810     }
811    
812     for (; c <= d; c++)
813     {
814     class[c/8] |= (1 << (c&7));
815     if ((options & PCRE_CASELESS) != 0)
816     {
817     int uc = pcre_fcc[c]; /* flip case */
818     class[uc/8] |= (1 << (uc&7));
819     }
820     class_charcount++; /* in case a one-char range */
821     class_lastchar = c;
822     }
823     continue; /* Go get the next char in the class */
824     }
825    
826     /* Handle a lone single character - we can get here for a normal
827     non-escape char, or after \ that introduces a single character. */
828    
829     class [c/8] |= (1 << (c&7));
830     if ((options & PCRE_CASELESS) != 0)
831     {
832     c = pcre_fcc[c]; /* flip case */
833     class[c/8] |= (1 << (c&7));
834     }
835     class_charcount++;
836     class_lastchar = c;
837     }
838    
839     /* Loop until ']' reached; the check for end of string happens inside the
840     loop. This "while" is the end of the "do" above. */
841    
842     while ((c = *(++ptr)) != ']');
843    
844     /* If class_charcount is 1 and class_lastchar is not negative, we saw
845     precisely one character. This doesn't need the whole 32-byte bit map.
846     We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
847     it's negative. */
848    
849     if (class_charcount == 1 && class_lastchar >= 0)
850     {
851     if (negate_class)
852     {
853     code[-1] = OP_NOT;
854     }
855     else
856     {
857     code[-1] = OP_CHARS;
858     *code++ = 1;
859     }
860     *code++ = class_lastchar;
861     }
862    
863     /* Otherwise, negate the 32-byte map if necessary, and copy it into
864     the code vector. */
865    
866     else
867     {
868     if (negate_class)
869     for (c = 0; c < 32; c++) code[c] = ~class[c];
870     else
871     memcpy(code, class, 32);
872     code += 32;
873     }
874     break;
875    
876     /* Various kinds of repeat */
877    
878     case '{':
879     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
880     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);
881     if (*errorptr != NULL) goto FAILED;
882     goto REPEAT;
883    
884     case '*':
885     repeat_min = 0;
886     repeat_max = -1;
887     goto REPEAT;
888    
889     case '+':
890     repeat_min = 1;
891     repeat_max = -1;
892     goto REPEAT;
893    
894     case '?':
895     repeat_min = 0;
896     repeat_max = 1;
897    
898     REPEAT:
899     if (previous == NULL)
900     {
901     *errorptr = ERR9;
902     goto FAILED;
903     }
904    
905     /* If the next character is '?' this is a minimizing repeat. Advance to the
906     next character. */
907    
908     if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
909    
910     /* If the maximum is zero then the minimum must also be zero; Perl allows
911     this case, so we do too - by simply omitting the item altogether. */
912    
913     if (repeat_max == 0) code = previous;
914    
915     /* If previous was a string of characters, chop off the last one and use it
916     as the subject of the repeat. If there was only one character, we can
917     abolish the previous item altogether. */
918    
919     else if (*previous == OP_CHARS)
920     {
921     int len = previous[1];
922     if (len == 1)
923     {
924     c = previous[2];
925     code = previous;
926     }
927     else
928     {
929     c = previous[len+1];
930     previous[1]--;
931     code--;
932     }
933     op_type = 0; /* Use single-char op codes */
934     goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
935     }
936    
937     /* If previous was a single negated character ([^a] or similar), we use
938     one of the special opcodes, replacing it. The code is shared with single-
939     character repeats by adding a suitable offset into repeat_type. */
940    
941     else if ((int)*previous == OP_NOT)
942     {
943     op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
944     c = previous[1];
945     code = previous;
946     goto OUTPUT_SINGLE_REPEAT;
947     }
948    
949     /* If previous was a character type match (\d or similar), abolish it and
950     create a suitable repeat item. The code is shared with single-character
951     repeats by adding a suitable offset into repeat_type. */
952    
953     else if ((int)*previous < OP_EOD || *previous == OP_ANY)
954     {
955     op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
956     c = *previous;
957     code = previous;
958    
959     OUTPUT_SINGLE_REPEAT:
960     repeat_type += op_type; /* Combine both values for many cases */
961    
962     /* A minimum of zero is handled either as the special case * or ?, or as
963     an UPTO, with the maximum given. */
964    
965     if (repeat_min == 0)
966     {
967     if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
968     else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
969     else
970     {
971     *code++ = OP_UPTO + repeat_type;
972     *code++ = repeat_max >> 8;
973     *code++ = (repeat_max & 255);
974     }
975     }
976    
977     /* The case {1,} is handled as the special case + */
978    
979     else if (repeat_min == 1 && repeat_max == -1)
980     *code++ = OP_PLUS + repeat_type;
981    
982     /* The case {n,n} is just an EXACT, while the general case {n,m} is
983     handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
984    
985     else
986     {
987     if (repeat_min != 1)
988     {
989     *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
990     *code++ = repeat_min >> 8;
991     *code++ = (repeat_min & 255);
992     }
993    
994     /* If the mininum is 1 and the previous item was a character string,
995     we either have to put back the item that got cancelled if the string
996     length was 1, or add the character back onto the end of a longer
997     string. For a character type nothing need be done; it will just get put
998     back naturally. */
999    
1000     else if (*previous == OP_CHARS)
1001     {
1002     if (code == previous) code += 2; else previous[1]++;
1003     }
1004    
1005 nigel 9 /* If the maximum is unlimited, insert an OP_STAR. */
1006 nigel 3
1007 nigel 9 if (repeat_max < 0)
1008 nigel 3 {
1009     *code++ = c;
1010 nigel 9 *code++ = OP_STAR + repeat_type;
1011     }
1012    
1013     /* Else insert an UPTO if the max is greater than the min. */
1014    
1015     else if (repeat_max != repeat_min)
1016     {
1017     *code++ = c;
1018 nigel 3 repeat_max -= repeat_min;
1019     *code++ = OP_UPTO + repeat_type;
1020     *code++ = repeat_max >> 8;
1021     *code++ = (repeat_max & 255);
1022     }
1023     }
1024    
1025     /* The character or character type itself comes last in all cases. */
1026    
1027     *code++ = c;
1028     }
1029    
1030     /* If previous was a character class or a back reference, we put the repeat
1031     stuff after it. */
1032    
1033 nigel 13 else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1034     *previous == OP_REF)
1035 nigel 3 {
1036     if (repeat_min == 0 && repeat_max == -1)
1037     *code++ = OP_CRSTAR + repeat_type;
1038     else if (repeat_min == 1 && repeat_max == -1)
1039     *code++ = OP_CRPLUS + repeat_type;
1040     else if (repeat_min == 0 && repeat_max == 1)
1041     *code++ = OP_CRQUERY + repeat_type;
1042     else
1043     {
1044     *code++ = OP_CRRANGE + repeat_type;
1045     *code++ = repeat_min >> 8;
1046     *code++ = repeat_min & 255;
1047     if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1048     *code++ = repeat_max >> 8;
1049     *code++ = repeat_max & 255;
1050     }
1051     }
1052    
1053     /* If previous was a bracket group, we may have to replicate it in certain
1054     cases. If the maximum repeat count is unlimited, check that the bracket
1055     group cannot match the empty string, and diagnose an error if it can. */
1056    
1057     else if ((int)*previous >= OP_BRA)
1058     {
1059     int i;
1060 nigel 9 int len = code - previous;
1061 nigel 3
1062     if (repeat_max == -1 && could_be_empty(previous))
1063     {
1064     *errorptr = ERR10;
1065     goto FAILED;
1066     }
1067    
1068     /* If the minimum is greater than zero, and the maximum is unlimited or
1069     equal to the minimum, the first copy remains where it is, and is
1070     replicated up to the minimum number of times. This case includes the +
1071     repeat, but of course no replication is needed in that case. */
1072    
1073     if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))
1074     {
1075     for (i = 1; i < repeat_min; i++)
1076     {
1077 nigel 9 memcpy(code, previous, len);
1078     code += len;
1079 nigel 3 }
1080     }
1081    
1082     /* If the minimum is zero, stick BRAZERO in front of the first copy.
1083     Then, if there is a fixed upper limit, replicated up to that many times,
1084     sticking BRAZERO in front of all the optional ones. */
1085    
1086     else
1087     {
1088     if (repeat_min == 0)
1089     {
1090 nigel 9 memmove(previous+1, previous, len);
1091 nigel 3 code++;
1092     *previous++ = OP_BRAZERO + repeat_type;
1093     }
1094    
1095     for (i = 1; i < repeat_min; i++)
1096     {
1097 nigel 9 memcpy(code, previous, len);
1098     code += len;
1099 nigel 3 }
1100    
1101     for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1102     {
1103     *code++ = OP_BRAZERO + repeat_type;
1104 nigel 9 memcpy(code, previous, len);
1105     code += len;
1106 nigel 3 }
1107     }
1108    
1109     /* If the maximum is unlimited, set a repeater in the final copy. */
1110    
1111     if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;
1112     }
1113    
1114     /* Else there's some kind of shambles */
1115    
1116     else
1117     {
1118     *errorptr = ERR11;
1119     goto FAILED;
1120     }
1121    
1122     /* In all case we no longer have a previous item. */
1123    
1124     previous = NULL;
1125     break;
1126    
1127    
1128     /* Start of nested bracket sub-expression, or comment or lookahead.
1129     First deal with special things that can come after a bracket; all are
1130     introduced by ?, and the appearance of any of them means that this is not a
1131     referencing group. They were checked for validity in the first pass over
1132     the string, so we don't have to check for syntax errors here. */
1133    
1134     case '(':
1135     previous = code; /* Only real brackets can be repeated */
1136     if (*(++ptr) == '?')
1137     {
1138     bravalue = OP_BRA;
1139    
1140     switch (*(++ptr))
1141     {
1142     case '#':
1143     case 'i':
1144     case 'm':
1145     case 's':
1146     case 'x':
1147     ptr++;
1148     while (*ptr != ')') ptr++;
1149     previous = NULL;
1150     continue;
1151    
1152     case ':': /* Non-extracting bracket */
1153     ptr++;
1154     break;
1155    
1156     case '=': /* Assertions can't be repeated */
1157     bravalue = OP_ASSERT;
1158     ptr++;
1159     previous = NULL;
1160     break;
1161    
1162     case '!':
1163     bravalue = OP_ASSERT_NOT;
1164     ptr++;
1165     previous = NULL;
1166     break;
1167    
1168     case '>': /* "Match once" brackets */
1169     if ((options & PCRE_EXTRA) != 0) /* Not yet standard */
1170     {
1171     bravalue = OP_ONCE;
1172     ptr++;
1173     previous = NULL;
1174     break;
1175     }
1176     /* Else fall through */
1177    
1178     default:
1179     *errorptr = ERR12;
1180     goto FAILED;
1181     }
1182     }
1183    
1184     /* Else we have a referencing group */
1185    
1186     else
1187     {
1188     if (++(*brackets) > EXTRACT_MAX)
1189     {
1190     *errorptr = ERR13;
1191     goto FAILED;
1192     }
1193     bravalue = OP_BRA + *brackets;
1194     }
1195    
1196     /* Process nested bracketed re; at end pointer is on the bracket. We copy
1197     code into a non-register variable in order to be able to pass its address
1198     because some compilers complain otherwise. */
1199    
1200     *code = bravalue;
1201     {
1202     uschar *mcode = code;
1203     if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))
1204     goto FAILED;
1205     code = mcode;
1206     }
1207    
1208     if (*ptr != ')')
1209     {
1210     *errorptr = ERR14;
1211     goto FAILED;
1212     }
1213     break;
1214    
1215     /* Check \ for being a real metacharacter; if not, fall through and handle
1216     it as a data character at the start of a string. Escape items are checked
1217     for validity in the pre-compiling pass. */
1218    
1219     case '\\':
1220     oldptr = ptr;
1221     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1222    
1223     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1224     are arranged to be the negation of the corresponding OP_values. For the
1225     back references, the values are ESC_REF plus the reference number. Only
1226     back references and those types that consume a character may be repeated.
1227     We can test for values between ESC_b and ESC_Z for the latter; this may
1228     have to change if any new ones are ever created. */
1229    
1230     if (c < 0)
1231     {
1232     if (-c >= ESC_REF)
1233     {
1234     int refnum = -c - ESC_REF;
1235     if (*brackets < refnum)
1236     {
1237     *errorptr = ERR15;
1238     goto FAILED;
1239     }
1240     previous = code;
1241     *code++ = OP_REF;
1242     *code++ = refnum;
1243     }
1244     else
1245     {
1246     previous = (-c > ESC_b && -c < ESC_X)? code : NULL;
1247     *code++ = -c;
1248     }
1249     continue;
1250     }
1251    
1252 nigel 7 /* Data character: reset and fall through */
1253 nigel 3
1254     ptr = oldptr;
1255     c = '\\';
1256    
1257     /* Handle a run of data characters until a metacharacter is encountered.
1258     The first character is guaranteed not to be whitespace or # when the
1259     extended flag is set. */
1260    
1261     NORMAL_CHAR:
1262     default:
1263     previous = code;
1264     *code = OP_CHARS;
1265     code += 2;
1266     length = 0;
1267    
1268     do
1269     {
1270     if ((options & PCRE_EXTENDED) != 0)
1271     {
1272     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
1273     if (c == '#')
1274     {
1275     while ((c = *(++ptr)) != 0 && c != '\n');
1276     if (c == 0) break;
1277     continue;
1278     }
1279     }
1280    
1281     /* Backslash may introduce a data char or a metacharacter. Escaped items
1282     are checked for validity in the pre-compiling pass. Stop the string
1283     before a metaitem. */
1284    
1285     if (c == '\\')
1286     {
1287     oldptr = ptr;
1288     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1289     if (c < 0) { ptr = oldptr; break; }
1290     }
1291    
1292     /* Ordinary character or single-char escape */
1293    
1294     *code++ = c;
1295     length++;
1296     }
1297    
1298     /* This "while" is the end of the "do" above. */
1299    
1300     while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1301    
1302     /* Compute the length and set it in the data vector, and advance to
1303     the next state. */
1304    
1305     previous[1] = length;
1306 nigel 15 if (length < 255) ptr--;
1307 nigel 3 break;
1308     }
1309     } /* end of big loop */
1310    
1311     /* Control never reaches here by falling through, only by a goto for all the
1312     error states. Pass back the position in the pattern so that it can be displayed
1313     to the user for diagnosing the error. */
1314    
1315     FAILED:
1316     *ptrptr = ptr;
1317     return FALSE;
1318     }
1319    
1320    
1321    
1322    
1323     /*************************************************
1324     * Compile sequence of alternatives *
1325     *************************************************/
1326    
1327     /* On entry, ptr is pointing past the bracket character, but on return
1328     it points to the closing bracket, or vertical bar, or end of string.
1329     The code variable is pointing at the byte into which the BRA operator has been
1330     stored.
1331    
1332     Argument:
1333     options the option bits
1334     brackets -> int containing the number of extracting brackets used
1335     codeptr -> the address of the current code pointer
1336     ptrptr -> the address of the current pattern pointer
1337     errorptr -> pointer to error message
1338    
1339     Returns: TRUE on success
1340     */
1341    
1342     static BOOL
1343 nigel 7 compile_regex(int options, int *brackets, uschar **codeptr,
1344     const uschar **ptrptr, const char **errorptr)
1345 nigel 3 {
1346 nigel 7 const uschar *ptr = *ptrptr;
1347 nigel 3 uschar *code = *codeptr;
1348     uschar *start_bracket = code;
1349    
1350     for (;;)
1351     {
1352     int length;
1353     uschar *last_branch = code;
1354    
1355     code += 3;
1356     if (!compile_branch(options, brackets, &code, &ptr, errorptr))
1357     {
1358     *ptrptr = ptr;
1359     return FALSE;
1360     }
1361    
1362     /* Fill in the length of the last branch */
1363    
1364     length = code - last_branch;
1365     last_branch[1] = length >> 8;
1366     last_branch[2] = length & 255;
1367    
1368     /* Reached end of expression, either ')' or end of pattern. Insert a
1369     terminating ket and the length of the whole bracketed item, and return,
1370     leaving the pointer at the terminating char. */
1371    
1372     if (*ptr != '|')
1373     {
1374     length = code - start_bracket;
1375     *code++ = OP_KET;
1376     *code++ = length >> 8;
1377     *code++ = length & 255;
1378     *codeptr = code;
1379     *ptrptr = ptr;
1380     return TRUE;
1381     }
1382    
1383     /* Another branch follows; insert an "or" node and advance the pointer. */
1384    
1385     *code = OP_ALT;
1386     ptr++;
1387     }
1388     /* Control never reaches here */
1389     }
1390    
1391    
1392    
1393     /*************************************************
1394     * Check for anchored expression *
1395     *************************************************/
1396    
1397     /* Try to find out if this is an anchored regular expression. Consider each
1398     alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1399     all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1400     it's anchored. However, if this is a multiline pattern, then only OP_SOD
1401     counts, since OP_CIRC can match in the middle.
1402    
1403     A branch is also implicitly anchored if it starts with .* because that will try
1404     the rest of the pattern at all possible matching points, so there is no point
1405     trying them again.
1406    
1407     Argument: points to start of expression (the bracket)
1408     Returns: TRUE or FALSE
1409     */
1410    
1411     static BOOL
1412 nigel 7 is_anchored(register const uschar *code, BOOL multiline)
1413 nigel 3 {
1414     do {
1415     int op = (int)code[3];
1416     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)
1417     { if (!is_anchored(code+3, multiline)) return FALSE; }
1418     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1419     { if (code[4] != OP_ANY) return FALSE; }
1420     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;
1421     code += (code[1] << 8) + code[2];
1422     }
1423     while (*code == OP_ALT);
1424     return TRUE;
1425     }
1426    
1427    
1428    
1429     /*************************************************
1430     * Check for start with \n line expression *
1431     *************************************************/
1432    
1433     /* This is called for multiline expressions to try to find out if every branch
1434     starts with ^ so that "first char" processing can be done to speed things up.
1435    
1436     Argument: points to start of expression (the bracket)
1437     Returns: TRUE or FALSE
1438     */
1439    
1440     static BOOL
1441 nigel 7 is_startline(const uschar *code)
1442 nigel 3 {
1443     do {
1444     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1445     { if (!is_startline(code+3)) return FALSE; }
1446     else if (code[3] != OP_CIRC) return FALSE;
1447     code += (code[1] << 8) + code[2];
1448     }
1449     while (*code == OP_ALT);
1450     return TRUE;
1451     }
1452    
1453    
1454    
1455     /*************************************************
1456     * Check for fixed first char *
1457     *************************************************/
1458    
1459     /* Try to find out if there is a fixed first character. This is called for
1460     unanchored expressions, as it speeds up their processing quite considerably.
1461     Consider each alternative branch. If they all start with the same char, or with
1462     a bracket all of whose alternatives start with the same char (recurse ad lib),
1463     then we return that char, otherwise -1.
1464    
1465     Argument: points to start of expression (the bracket)
1466     Returns: -1 or the fixed first char
1467     */
1468    
1469     static int
1470     find_firstchar(uschar *code)
1471     {
1472     register int c = -1;
1473     do
1474     {
1475     register int charoffset = 4;
1476    
1477     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1478     {
1479     register int d;
1480     if ((d = find_firstchar(code+3)) < 0) return -1;
1481     if (c < 0) c = d; else if (c != d) return -1;
1482     }
1483    
1484     else switch(code[3])
1485     {
1486     default:
1487     return -1;
1488    
1489     case OP_EXACT: /* Fall through */
1490     charoffset++;
1491    
1492     case OP_CHARS: /* Fall through */
1493     charoffset++;
1494    
1495     case OP_PLUS:
1496     case OP_MINPLUS:
1497     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;
1498     break;
1499     }
1500     code += (code[1] << 8) + code[2];
1501     }
1502     while (*code == OP_ALT);
1503     return c;
1504     }
1505    
1506    
1507    
1508     /*************************************************
1509     * Compile a Regular Expression *
1510     *************************************************/
1511    
1512     /* This function takes a string and returns a pointer to a block of store
1513     holding a compiled version of the expression.
1514    
1515     Arguments:
1516     pattern the regular expression
1517     options various option bits
1518     errorptr pointer to pointer to error text
1519     erroroffset ptr offset in pattern where error was detected
1520    
1521     Returns: pointer to compiled data block, or NULL on error,
1522     with errorptr and erroroffset set
1523     */
1524    
1525     pcre *
1526 nigel 7 pcre_compile(const char *pattern, int options, const char **errorptr,
1527 nigel 3 int *erroroffset)
1528     {
1529     real_pcre *re;
1530     int spaces = 0;
1531     int length = 3; /* For initial BRA plus length */
1532     int runlength;
1533     int c, size;
1534     int bracount = 0;
1535     int brastack[200];
1536     int top_backref = 0;
1537 nigel 7 unsigned int brastackptr = 0;
1538     uschar *code;
1539     const uschar *ptr;
1540 nigel 3
1541     #ifdef DEBUG
1542     uschar *code_base, *code_end;
1543     #endif
1544    
1545     /* We can't pass back an error message if errorptr is NULL; I guess the best we
1546     can do is just return NULL. */
1547    
1548     if (errorptr == NULL) return NULL;
1549     *errorptr = NULL;
1550    
1551     /* However, we can give a message for this error */
1552    
1553     if (erroroffset == NULL)
1554     {
1555     *errorptr = ERR16;
1556     return NULL;
1557     }
1558     *erroroffset = 0;
1559    
1560     if ((options & ~PUBLIC_OPTIONS) != 0)
1561     {
1562     *errorptr = ERR17;
1563     return NULL;
1564     }
1565    
1566 nigel 9 DPRINTF(("------------------------------------------------------------------\n"));
1567     DPRINTF(("%s\n", pattern));
1568 nigel 3
1569     /* The first thing to do is to make a pass over the pattern to compute the
1570     amount of store required to hold the compiled code. This does not have to be
1571     perfect as long as errors are overestimates. At the same time we can detect any
1572     internal flag settings. Make an attempt to correct for any counted white space
1573     if an "extended" flag setting appears late in the pattern. We can't be so
1574     clever for #-comments. */
1575    
1576 nigel 7 ptr = (const uschar *)(pattern - 1);
1577 nigel 3 while ((c = *(++ptr)) != 0)
1578     {
1579     int min, max;
1580     int class_charcount;
1581    
1582     if ((pcre_ctypes[c] & ctype_space) != 0)
1583     {
1584     if ((options & PCRE_EXTENDED) != 0) continue;
1585     spaces++;
1586     }
1587    
1588     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1589     {
1590     while ((c = *(++ptr)) != 0 && c != '\n');
1591     continue;
1592     }
1593    
1594     switch(c)
1595     {
1596     /* A backslashed item may be an escaped "normal" character or a
1597     character type. For a "normal" character, put the pointers and
1598     character back so that tests for whitespace etc. in the input
1599     are done correctly. */
1600    
1601     case '\\':
1602     {
1603 nigel 7 const uschar *save_ptr = ptr;
1604 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1605     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1606     if (c >= 0)
1607     {
1608     ptr = save_ptr;
1609     c = '\\';
1610     goto NORMAL_CHAR;
1611     }
1612     }
1613     length++;
1614    
1615     /* A back reference needs an additional char, plus either one or 5
1616     bytes for a repeat. We also need to keep the value of the highest
1617     back reference. */
1618    
1619     if (c <= -ESC_REF)
1620     {
1621     int refnum = -c - ESC_REF;
1622     if (refnum > top_backref) top_backref = refnum;
1623     length++; /* For single back reference */
1624     if (ptr[1] == '{' && is_counted_repeat(ptr+2))
1625     {
1626     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1627     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1628     if ((min == 0 && (max == 1 || max == -1)) ||
1629     (min == 1 && max == -1))
1630     length++;
1631     else length += 5;
1632     if (ptr[1] == '?') ptr++;
1633     }
1634     }
1635     continue;
1636    
1637     case '^':
1638     case '.':
1639     case '$':
1640     case '*': /* These repeats won't be after brackets; */
1641     case '+': /* those are handled separately */
1642     case '?':
1643     length++;
1644     continue;
1645    
1646     /* This covers the cases of repeats after a single char, metachar, class,
1647     or back reference. */
1648    
1649     case '{':
1650     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
1651     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
1652     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1653     if ((min == 0 && (max == 1 || max == -1)) ||
1654     (min == 1 && max == -1))
1655     length++;
1656     else
1657     {
1658     length--; /* Uncount the original char or metachar */
1659     if (min == 1) length++; else if (min > 0) length += 4;
1660     if (max > 0) length += 4; else length += 2;
1661     }
1662     if (ptr[1] == '?') ptr++;
1663     continue;
1664    
1665     /* An alternation contains an offset to the next branch or ket. */
1666     case '|':
1667     length += 3;
1668     continue;
1669    
1670     /* A character class uses 33 characters. Don't worry about character types
1671     that aren't allowed in classes - they'll get picked up during the compile.
1672     A character class that contains only one character uses 2 or 3 bytes,
1673     depending on whether it is negated or not. Notice this where we can. */
1674    
1675     case '[':
1676     class_charcount = 0;
1677     if (*(++ptr) == '^') ptr++;
1678     do
1679     {
1680     if (*ptr == '\\')
1681     {
1682 nigel 9 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1683 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1684 nigel 9 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1685 nigel 3 }
1686     else class_charcount++;
1687     ptr++;
1688     }
1689     while (*ptr != 0 && *ptr != ']');
1690    
1691     /* Repeats for negated single chars are handled by the general code */
1692    
1693     if (class_charcount == 1) length += 3; else
1694     {
1695     length += 33;
1696    
1697     /* A repeat needs either 1 or 5 bytes. */
1698    
1699 nigel 9 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1700 nigel 3 {
1701     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1702     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1703     if ((min == 0 && (max == 1 || max == -1)) ||
1704     (min == 1 && max == -1))
1705     length++;
1706     else length += 5;
1707     if (ptr[1] == '?') ptr++;
1708     }
1709     }
1710     continue;
1711    
1712     /* Brackets may be genuine groups or special things */
1713    
1714     case '(':
1715    
1716     /* Handle special forms of bracket, which all start (? */
1717    
1718     if (ptr[1] == '?') switch (c = ptr[2])
1719     {
1720     /* Skip over comments entirely */
1721     case '#':
1722     ptr += 3;
1723     while (*ptr != 0 && *ptr != ')') ptr++;
1724     if (*ptr == 0)
1725     {
1726     *errorptr = ERR18;
1727     goto PCRE_ERROR_RETURN;
1728     }
1729     continue;
1730    
1731     /* Non-referencing groups and lookaheads just move the pointer on, and
1732     then behave like a non-special bracket, except that they don't increment
1733     the count of extracting brackets. */
1734    
1735     case ':':
1736     case '=':
1737     case '!':
1738     ptr += 2;
1739     break;
1740    
1741     /* Ditto for the "once only" bracket, allowed only if the extra bit
1742     is set. */
1743    
1744     case '>':
1745     if ((options & PCRE_EXTRA) != 0)
1746     {
1747     ptr += 2;
1748     break;
1749     }
1750     /* Else fall thourh */
1751    
1752     /* Else loop setting valid options until ) is met. Anything else is an
1753     error. */
1754    
1755     default:
1756     ptr += 2;
1757     for (;; ptr++)
1758     {
1759     if ((c = *ptr) == 'i')
1760     {
1761     options |= PCRE_CASELESS;
1762     continue;
1763     }
1764     else if ((c = *ptr) == 'm')
1765     {
1766     options |= PCRE_MULTILINE;
1767     continue;
1768     }
1769     else if (c == 's')
1770     {
1771     options |= PCRE_DOTALL;
1772     continue;
1773     }
1774     else if (c == 'x')
1775     {
1776     options |= PCRE_EXTENDED;
1777     length -= spaces; /* Already counted spaces */
1778     continue;
1779     }
1780     else if (c == ')') break;
1781    
1782     *errorptr = ERR12;
1783     goto PCRE_ERROR_RETURN;
1784     }
1785     continue; /* End of this bracket handling */
1786     }
1787    
1788     /* Extracting brackets must be counted so we can process escapes in a
1789     Perlish way. */
1790    
1791     else bracount++;
1792    
1793     /* Non-special forms of bracket. Save length for computing whole length
1794     at end if there's a repeat that requires duplication of the group. */
1795    
1796     if (brastackptr >= sizeof(brastack)/sizeof(int))
1797     {
1798     *errorptr = ERR19;
1799     goto PCRE_ERROR_RETURN;
1800     }
1801    
1802     brastack[brastackptr++] = length;
1803     length += 3;
1804     continue;
1805    
1806     /* Handle ket. Look for subsequent max/min; for certain sets of values we
1807 nigel 9 have to replicate this bracket up to that many times. If brastackptr is
1808     0 this is an unmatched bracket which will generate an error, but take care
1809     not to try to access brastack[-1]. */
1810 nigel 3
1811     case ')':
1812     length += 3;
1813     {
1814 nigel 9 int minval = 1;
1815     int maxval = 1;
1816     int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1817 nigel 3
1818     /* Leave ptr at the final char; for read_repeat_counts this happens
1819     automatically; for the others we need an increment. */
1820    
1821     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1822     {
1823 nigel 9 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1824 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1825     }
1826 nigel 9 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1827     else if (c == '+') { maxval = -1; ptr++; }
1828     else if (c == '?') { minval = 0; ptr++; }
1829 nigel 3
1830 nigel 9 /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1831     if there is a limited maximum we have to replicate up to maxval-1 times
1832     and allow for a BRAZERO item before each optional copy, as we also have
1833     to do before the first copy if the minimum is zero. */
1834 nigel 3
1835 nigel 9 if (minval == 0) length++;
1836     else if (minval > 1) length += (minval - 1) * duplength;
1837     if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1838 nigel 3 }
1839     continue;
1840    
1841     /* Non-special character. For a run of such characters the length required
1842     is the number of characters + 2, except that the maximum run length is 255.
1843     We won't get a skipped space or a non-data escape or the start of a #
1844     comment as the first character, so the length can't be zero. */
1845    
1846     NORMAL_CHAR:
1847     default:
1848     length += 2;
1849     runlength = 0;
1850     do
1851     {
1852     if ((pcre_ctypes[c] & ctype_space) != 0)
1853     {
1854     if ((options & PCRE_EXTENDED) != 0) continue;
1855     spaces++;
1856     }
1857    
1858     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1859     {
1860     while ((c = *(++ptr)) != 0 && c != '\n');
1861     continue;
1862     }
1863    
1864     /* Backslash may introduce a data char or a metacharacter; stop the
1865     string before the latter. */
1866    
1867     if (c == '\\')
1868     {
1869 nigel 7 const uschar *saveptr = ptr;
1870 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1871     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1872     if (c < 0) { ptr = saveptr; break; }
1873     }
1874    
1875     /* Ordinary character or single-char escape */
1876    
1877     runlength++;
1878     }
1879    
1880     /* This "while" is the end of the "do" above. */
1881    
1882     while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1883    
1884     ptr--;
1885     length += runlength;
1886     continue;
1887     }
1888     }
1889    
1890     length += 4; /* For final KET and END */
1891    
1892     if (length > 65539)
1893     {
1894     *errorptr = ERR20;
1895     return NULL;
1896     }
1897    
1898     /* Compute the size of data block needed and get it, either from malloc or
1899 nigel 9 externally provided function. We specify "code[0]" in the offsetof() expression
1900     rather than just "code", because it has been reported that one broken compiler
1901     fails on "code" because it is also an independent variable. It should make no
1902     difference to the value of the offsetof(). */
1903 nigel 3
1904 nigel 9 size = length + offsetof(real_pcre, code[0]);
1905 nigel 3 re = (real_pcre *)(pcre_malloc)(size);
1906    
1907     if (re == NULL)
1908     {
1909     *errorptr = ERR21;
1910     return NULL;
1911     }
1912    
1913 nigel 9 /* Put in the magic number and the options. */
1914    
1915 nigel 3 re->magic_number = MAGIC_NUMBER;
1916     re->options = options;
1917    
1918     /* Set up a starting, non-extracting bracket, then compile the expression. On
1919     error, *errorptr will be set non-NULL, so we don't need to look at the result
1920     of the function here. */
1921    
1922 nigel 7 ptr = (const uschar *)pattern;
1923 nigel 3 code = re->code;
1924     *code = OP_BRA;
1925     bracount = 0;
1926     (void)compile_regex(options, &bracount, &code, &ptr, errorptr);
1927     re->top_bracket = bracount;
1928     re->top_backref = top_backref;
1929    
1930     /* If not reached end of pattern on success, there's an excess bracket. */
1931    
1932     if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
1933    
1934     /* Fill in the terminating state and check for disastrous overflow, but
1935     if debugging, leave the test till after things are printed out. */
1936    
1937     *code++ = OP_END;
1938    
1939     #ifndef DEBUG
1940     if (code - re->code > length) *errorptr = ERR23;
1941     #endif
1942    
1943     /* Failed to compile */
1944    
1945     if (*errorptr != NULL)
1946     {
1947     (pcre_free)(re);
1948     PCRE_ERROR_RETURN:
1949 nigel 7 *erroroffset = ptr - (const uschar *)pattern;
1950 nigel 3 return NULL;
1951     }
1952    
1953     /* If the anchored option was not passed, set flag if we can determine that it
1954     is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if
1955     we can determine what the first character has to be, because that speeds up
1956     unanchored matches no end. In the case of multiline matches, an alternative is
1957     to set the PCRE_STARTLINE flag if all branches start with ^. */
1958    
1959     if ((options & PCRE_ANCHORED) == 0)
1960     {
1961     if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))
1962     re->options |= PCRE_ANCHORED;
1963     else
1964     {
1965 nigel 9 int ch = find_firstchar(re->code);
1966     if (ch >= 0)
1967 nigel 3 {
1968 nigel 9 re->first_char = ch;
1969 nigel 3 re->options |= PCRE_FIRSTSET;
1970     }
1971     else if (is_startline(re->code))
1972     re->options |= PCRE_STARTLINE;
1973     }
1974     }
1975    
1976     /* Print out the compiled data for debugging */
1977    
1978     #ifdef DEBUG
1979    
1980     printf("Length = %d top_bracket = %d top_backref=%d\n",
1981     length, re->top_bracket, re->top_backref);
1982    
1983     if (re->options != 0)
1984     {
1985     printf("%s%s%s%s%s%s%s\n",
1986     ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
1987     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
1988     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
1989     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
1990     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
1991     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
1992     ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
1993     }
1994    
1995     if ((re->options & PCRE_FIRSTSET) != 0)
1996     {
1997     if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
1998     else printf("First char = \\x%02x\n", re->first_char);
1999     }
2000    
2001     code_end = code;
2002     code_base = code = re->code;
2003    
2004     while (code < code_end)
2005     {
2006     int charlength;
2007    
2008     printf("%3d ", code - code_base);
2009    
2010     if (*code >= OP_BRA)
2011     {
2012     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2013     code += 2;
2014     }
2015    
2016     else switch(*code)
2017     {
2018     case OP_CHARS:
2019     charlength = *(++code);
2020     printf("%3d ", charlength);
2021     while (charlength-- > 0)
2022     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2023     break;
2024    
2025     case OP_KETRMAX:
2026     case OP_KETRMIN:
2027     case OP_ALT:
2028     case OP_KET:
2029     case OP_ASSERT:
2030     case OP_ASSERT_NOT:
2031     case OP_ONCE:
2032     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2033     code += 2;
2034     break;
2035    
2036     case OP_STAR:
2037     case OP_MINSTAR:
2038     case OP_PLUS:
2039     case OP_MINPLUS:
2040     case OP_QUERY:
2041     case OP_MINQUERY:
2042     case OP_TYPESTAR:
2043     case OP_TYPEMINSTAR:
2044     case OP_TYPEPLUS:
2045     case OP_TYPEMINPLUS:
2046     case OP_TYPEQUERY:
2047     case OP_TYPEMINQUERY:
2048     if (*code >= OP_TYPESTAR)
2049     printf(" %s", OP_names[code[1]]);
2050     else if (isprint(c = code[1])) printf(" %c", c);
2051     else printf(" \\x%02x", c);
2052     printf("%s", OP_names[*code++]);
2053     break;
2054    
2055     case OP_EXACT:
2056     case OP_UPTO:
2057     case OP_MINUPTO:
2058     if (isprint(c = code[3])) printf(" %c{", c);
2059     else printf(" \\x%02x{", c);
2060 nigel 11 if (*code != OP_EXACT) printf("0,");
2061 nigel 3 printf("%d}", (code[1] << 8) + code[2]);
2062     if (*code == OP_MINUPTO) printf("?");
2063     code += 3;
2064     break;
2065    
2066     case OP_TYPEEXACT:
2067     case OP_TYPEUPTO:
2068     case OP_TYPEMINUPTO:
2069     printf(" %s{", OP_names[code[3]]);
2070     if (*code != OP_TYPEEXACT) printf(",");
2071     printf("%d}", (code[1] << 8) + code[2]);
2072     if (*code == OP_TYPEMINUPTO) printf("?");
2073     code += 3;
2074     break;
2075    
2076     case OP_NOT:
2077     if (isprint(c = *(++code))) printf(" [^%c]", c);
2078     else printf(" [^\\x%02x]", c);
2079     break;
2080    
2081     case OP_NOTSTAR:
2082     case OP_NOTMINSTAR:
2083     case OP_NOTPLUS:
2084     case OP_NOTMINPLUS:
2085     case OP_NOTQUERY:
2086     case OP_NOTMINQUERY:
2087     if (isprint(c = code[1])) printf(" [^%c]", c);
2088     else printf(" [^\\x%02x]", c);
2089     printf("%s", OP_names[*code++]);
2090     break;
2091    
2092     case OP_NOTEXACT:
2093     case OP_NOTUPTO:
2094     case OP_NOTMINUPTO:
2095     if (isprint(c = code[3])) printf(" [^%c]{", c);
2096     else printf(" [^\\x%02x]{", c);
2097     if (*code != OP_NOTEXACT) printf(",");
2098     printf("%d}", (code[1] << 8) + code[2]);
2099     if (*code == OP_NOTMINUPTO) printf("?");
2100     code += 3;
2101     break;
2102    
2103     case OP_REF:
2104     printf(" \\%d", *(++code));
2105 nigel 9 code ++;
2106     goto CLASS_REF_REPEAT;
2107 nigel 3
2108     case OP_CLASS:
2109 nigel 13 case OP_NEGCLASS:
2110 nigel 3 {
2111     int i, min, max;
2112    
2113 nigel 13 if (*code++ == OP_CLASS) printf(" [");
2114     else printf(" ^[");
2115 nigel 3
2116     for (i = 0; i < 256; i++)
2117     {
2118     if ((code[i/8] & (1 << (i&7))) != 0)
2119     {
2120     int j;
2121     for (j = i+1; j < 256; j++)
2122     if ((code[j/8] & (1 << (j&7))) == 0) break;
2123     if (i == '-' || i == ']') printf("\\");
2124     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2125     if (--j > i)
2126     {
2127     printf("-");
2128     if (j == '-' || j == ']') printf("\\");
2129     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2130     }
2131     i = j;
2132     }
2133     }
2134     printf("]");
2135     code += 32;
2136    
2137 nigel 9 CLASS_REF_REPEAT:
2138    
2139 nigel 3 switch(*code)
2140     {
2141     case OP_CRSTAR:
2142     case OP_CRMINSTAR:
2143     case OP_CRPLUS:
2144     case OP_CRMINPLUS:
2145     case OP_CRQUERY:
2146     case OP_CRMINQUERY:
2147     printf("%s", OP_names[*code]);
2148     break;
2149    
2150     case OP_CRRANGE:
2151     case OP_CRMINRANGE:
2152     min = (code[1] << 8) + code[2];
2153     max = (code[3] << 8) + code[4];
2154     if (max == 0) printf("{%d,}", min);
2155     else printf("{%d,%d}", min, max);
2156     if (*code == OP_CRMINRANGE) printf("?");
2157     code += 4;
2158     break;
2159    
2160     default:
2161     code--;
2162     }
2163     }
2164     break;
2165    
2166     /* Anything else is just a one-node item */
2167    
2168     default:
2169     printf(" %s", OP_names[*code]);
2170     break;
2171     }
2172    
2173     code++;
2174     printf("\n");
2175     }
2176     printf("------------------------------------------------------------------\n");
2177    
2178     /* This check is done here in the debugging case so that the code that
2179     was compiled can be seen. */
2180    
2181     if (code - re->code > length)
2182     {
2183     *errorptr = ERR23;
2184     (pcre_free)(re);
2185     *erroroffset = ptr - (uschar *)pattern;
2186     return NULL;
2187     }
2188     #endif
2189    
2190     return (pcre *)re;
2191     }
2192    
2193    
2194    
2195     /*************************************************
2196     * Match a character type *
2197     *************************************************/
2198    
2199     /* Not used in all the places it might be as it's sometimes faster
2200     to put the code inline.
2201    
2202     Arguments:
2203     type the character type
2204     c the character
2205     dotall the dotall flag
2206    
2207     Returns: TRUE if character is of the type
2208     */
2209    
2210     static BOOL
2211     match_type(int type, int c, BOOL dotall)
2212     {
2213    
2214     #ifdef DEBUG
2215     if (isprint(c)) printf("matching subject %c against ", c);
2216     else printf("matching subject \\x%02x against ", c);
2217     printf("%s\n", OP_names[type]);
2218     #endif
2219    
2220     switch(type)
2221     {
2222     case OP_ANY: return dotall || c != '\n';
2223     case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0;
2224     case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0;
2225     case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;
2226     case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0;
2227     case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0;
2228     case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0;
2229     }
2230     return FALSE;
2231     }
2232    
2233    
2234    
2235     /*************************************************
2236     * Match a back-reference *
2237     *************************************************/
2238    
2239     /* If a back reference hasn't been set, the match fails.
2240    
2241     Arguments:
2242     number reference number
2243     eptr points into the subject
2244     length length to be matched
2245     md points to match data block
2246    
2247     Returns: TRUE if matched
2248     */
2249    
2250     static BOOL
2251 nigel 7 match_ref(int number, register const uschar *eptr, int length, match_data *md)
2252 nigel 3 {
2253 nigel 7 const uschar *p = md->start_subject + md->offset_vector[number];
2254 nigel 3
2255     #ifdef DEBUG
2256     if (eptr >= md->end_subject)
2257     printf("matching subject <null>");
2258     else
2259     {
2260     printf("matching subject ");
2261     pchars(eptr, length, TRUE, md);
2262     }
2263     printf(" against backref ");
2264     pchars(p, length, FALSE, md);
2265     printf("\n");
2266     #endif
2267    
2268     /* Always fail if not enough characters left */
2269    
2270     if (length > md->end_subject - p) return FALSE;
2271    
2272     /* Separate the caselesss case for speed */
2273    
2274     if (md->caseless)
2275     { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
2276     else
2277     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2278    
2279     return TRUE;
2280     }
2281    
2282    
2283    
2284     /*************************************************
2285     * Match from current position *
2286     *************************************************/
2287    
2288     /* On entry ecode points to the first opcode, and eptr to the first character.
2289    
2290     Arguments:
2291     eptr pointer in subject
2292     ecode position in code
2293     offset_top current top pointer
2294     md pointer to "static" info for the match
2295    
2296     Returns: TRUE if matched
2297     */
2298    
2299     static BOOL
2300 nigel 7 match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2301 nigel 3 match_data *md)
2302     {
2303     for (;;)
2304     {
2305     int min, max, ctype;
2306     register int i;
2307     register int c;
2308 nigel 7 BOOL minimize = FALSE;
2309 nigel 3
2310     /* Opening bracket. Check the alternative branches in turn, failing if none
2311     match. We have to set the start offset if required and there is space
2312     in the offset vector so that it is available for subsequent back references
2313     if the bracket matches. However, if the bracket fails, we must put back the
2314     previous value of both offsets in case they were set by a previous copy of
2315     the same bracket. Don't worry about setting the flag for the error case here;
2316     that is handled in the code for KET. */
2317    
2318     if ((int)*ecode >= OP_BRA)
2319     {
2320     int number = (*ecode - OP_BRA) << 1;
2321 nigel 7 int save_offset1 = 0, save_offset2 = 0;
2322 nigel 3
2323 nigel 9 DPRINTF(("start bracket %d\n", number/2));
2324 nigel 3
2325     if (number > 0 && number < md->offset_end)
2326     {
2327     save_offset1 = md->offset_vector[number];
2328     save_offset2 = md->offset_vector[number+1];
2329     md->offset_vector[number] = eptr - md->start_subject;
2330    
2331 nigel 9 DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
2332 nigel 3 }
2333    
2334     /* Recurse for all the alternatives. */
2335    
2336     do
2337     {
2338     if (match(eptr, ecode+3, offset_top, md)) return TRUE;
2339     ecode += (ecode[1] << 8) + ecode[2];
2340     }
2341     while (*ecode == OP_ALT);
2342    
2343 nigel 9 DPRINTF(("bracket %d failed\n", number/2));
2344 nigel 3
2345     if (number > 0 && number < md->offset_end)
2346     {
2347     md->offset_vector[number] = save_offset1;
2348     md->offset_vector[number+1] = save_offset2;
2349     }
2350    
2351     return FALSE;
2352     }
2353    
2354     /* Other types of node can be handled by a switch */
2355    
2356     switch(*ecode)
2357     {
2358     case OP_END:
2359     md->end_match_ptr = eptr; /* Record where we ended */
2360     md->end_offset_top = offset_top; /* and how many extracts were taken */
2361     return TRUE;
2362    
2363     /* The equivalent of Prolog's "cut" - if the rest doesn't match, the
2364     whole thing doesn't match, so we have to get out via a longjmp(). */
2365    
2366     case OP_CUT:
2367     if (match(eptr, ecode+1, offset_top, md)) return TRUE;
2368     longjmp(md->fail_env, 1);
2369    
2370     /* Assertion brackets. Check the alternative branches in turn - the
2371     matching won't pass the KET for an assertion. If any one branch matches,
2372     the assertion is true. */
2373    
2374     case OP_ASSERT:
2375     do
2376     {
2377     if (match(eptr, ecode+3, offset_top, md)) break;
2378     ecode += (ecode[1] << 8) + ecode[2];
2379     }
2380     while (*ecode == OP_ALT);
2381     if (*ecode == OP_KET) return FALSE;
2382    
2383     /* Continue from after the assertion, updating the offsets high water
2384     mark, since extracts may have been taken during the assertion. */
2385    
2386     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2387     ecode += 3;
2388     offset_top = md->end_offset_top;
2389     continue;
2390    
2391     /* Negative assertion: all branches must fail to match */
2392    
2393     case OP_ASSERT_NOT:
2394     do
2395     {
2396     if (match(eptr, ecode+3, offset_top, md)) return FALSE;
2397     ecode += (ecode[1] << 8) + ecode[2];
2398     }
2399     while (*ecode == OP_ALT);
2400     ecode += 3;
2401     continue;
2402    
2403     /* "Once" brackets are like assertion brackets except that after a match,
2404     the point in the subject string is not moved back. Thus there can never be
2405 nigel 5 a move back into the brackets. Check the alternative branches in turn - the
2406 nigel 3 matching won't pass the KET for this kind of subpattern. If any one branch
2407     matches, we carry on, leaving the subject pointer. */
2408    
2409     case OP_ONCE:
2410     do
2411     {
2412     if (match(eptr, ecode+3, offset_top, md)) break;
2413     ecode += (ecode[1] << 8) + ecode[2];
2414     }
2415     while (*ecode == OP_ALT);
2416     if (*ecode == OP_KET) return FALSE;
2417    
2418     /* Continue as from after the assertion, updating the offsets high water
2419     mark, since extracts may have been taken. */
2420    
2421     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2422     ecode += 3;
2423     offset_top = md->end_offset_top;
2424     eptr = md->end_match_ptr;
2425     continue;
2426    
2427     /* An alternation is the end of a branch; scan along to find the end of the
2428     bracketed group and go to there. */
2429    
2430     case OP_ALT:
2431     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2432     break;
2433    
2434     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
2435     that it may occur zero times. It may repeat infinitely, or not at all -
2436     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
2437     repeat limits are compiled as a number of copies, with the optional ones
2438     preceded by BRAZERO or BRAMINZERO. */
2439    
2440     case OP_BRAZERO:
2441     {
2442 nigel 7 const uschar *next = ecode+1;
2443 nigel 3 if (match(eptr, next, offset_top, md)) return TRUE;
2444     do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2445     ecode = next + 3;
2446     }
2447     break;
2448    
2449     case OP_BRAMINZERO:
2450     {
2451 nigel 7 const uschar *next = ecode+1;
2452 nigel 3 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2453     if (match(eptr, next+3, offset_top, md)) return TRUE;
2454     ecode++;
2455     }
2456     break;;
2457    
2458     /* End of a group, repeated or non-repeating. If we are at the end of
2459     an assertion "group", stop matching and return TRUE, but record the
2460     current high water mark for use by positive assertions. */
2461    
2462     case OP_KET:
2463     case OP_KETRMIN:
2464     case OP_KETRMAX:
2465     {
2466     int number;
2467 nigel 7 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2468 nigel 3
2469     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2470     {
2471     md->end_match_ptr = eptr; /* For ONCE */
2472     md->end_offset_top = offset_top;
2473     return TRUE;
2474     }
2475    
2476     /* In all other cases we have to check the group number back at the
2477     start and if necessary complete handling an extraction by setting the
2478     final offset and bumping the high water mark. */
2479    
2480     number = (*prev - OP_BRA) << 1;
2481    
2482 nigel 9 DPRINTF(("end bracket %d\n", number/2));
2483 nigel 3
2484     if (number > 0)
2485     {
2486     if (number >= md->offset_end) md->offset_overflow = TRUE; else
2487     {
2488     md->offset_vector[number+1] = eptr - md->start_subject;
2489     if (offset_top <= number) offset_top = number + 2;
2490     }
2491     }
2492    
2493     /* For a non-repeating ket, just advance to the next node and continue at
2494     this level. */
2495    
2496     if (*ecode == OP_KET)
2497     {
2498     ecode += 3;
2499     break;
2500     }
2501    
2502     /* The repeating kets try the rest of the pattern or restart from the
2503     preceding bracket, in the appropriate order. */
2504    
2505     if (*ecode == OP_KETRMIN)
2506     {
2507     if (match(eptr, ecode+3, offset_top, md) ||
2508     match(eptr, prev, offset_top, md)) return TRUE;
2509     }
2510     else /* OP_KETRMAX */
2511     {
2512     if (match(eptr, prev, offset_top, md) ||
2513     match(eptr, ecode+3, offset_top, md)) return TRUE;
2514     }
2515     }
2516     return FALSE;
2517    
2518     /* Start of subject unless notbol, or after internal newline if multiline */
2519    
2520     case OP_CIRC:
2521     if (md->notbol && eptr == md->start_subject) return FALSE;
2522     if (md->multiline)
2523     {
2524     if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
2525     ecode++;
2526     break;
2527     }
2528     /* ... else fall through */
2529    
2530     /* Start of subject assertion */
2531    
2532     case OP_SOD:
2533     if (eptr != md->start_subject) return FALSE;
2534     ecode++;
2535     break;
2536    
2537     /* Assert before internal newline if multiline, or before
2538     a terminating newline unless endonly is set, else end of subject unless
2539     noteol is set. */
2540    
2541     case OP_DOLL:
2542     if (md->noteol && eptr >= md->end_subject) return FALSE;
2543     if (md->multiline)
2544     {
2545     if (eptr < md->end_subject && *eptr != '\n') return FALSE;
2546     ecode++;
2547     break;
2548     }
2549     else if (!md->endonly)
2550     {
2551     if (eptr < md->end_subject - 1 ||
2552     (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
2553     ecode++;
2554     break;
2555     }
2556     /* ... else fall through */
2557    
2558     /* End of subject assertion */
2559    
2560     case OP_EOD:
2561     if (eptr < md->end_subject) return FALSE;
2562     ecode++;
2563     break;
2564    
2565     /* Word boundary assertions */
2566    
2567     case OP_NOT_WORD_BOUNDARY:
2568     case OP_WORD_BOUNDARY:
2569     {
2570     BOOL prev_is_word = (eptr != md->start_subject) &&
2571     ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);
2572     BOOL cur_is_word = (eptr < md->end_subject) &&
2573     ((pcre_ctypes[*eptr] & ctype_word) != 0);
2574     if ((*ecode++ == OP_WORD_BOUNDARY)?
2575     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2576     return FALSE;
2577     }
2578     break;
2579    
2580     /* Match a single character type; inline for speed */
2581    
2582     case OP_ANY:
2583     if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;
2584     if (eptr++ >= md->end_subject) return FALSE;
2585     ecode++;
2586     break;
2587    
2588     case OP_NOT_DIGIT:
2589     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)
2590     return FALSE;
2591     ecode++;
2592     break;
2593    
2594     case OP_DIGIT:
2595     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)
2596     return FALSE;
2597     ecode++;
2598     break;
2599    
2600     case OP_NOT_WHITESPACE:
2601     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)
2602     return FALSE;
2603     ecode++;
2604     break;
2605    
2606     case OP_WHITESPACE:
2607     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)
2608     return FALSE;
2609     ecode++;
2610     break;
2611    
2612     case OP_NOT_WORDCHAR:
2613     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)
2614     return FALSE;
2615     ecode++;
2616     break;
2617    
2618     case OP_WORDCHAR:
2619     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)
2620     return FALSE;
2621     ecode++;
2622     break;
2623    
2624     /* Match a back reference, possibly repeatedly. Look past the end of the
2625     item to see if there is repeat information following. The code is similar
2626     to that for character classes, but repeated for efficiency. Then obey
2627     similar code to character type repeats - written out again for speed.
2628     However, if the referenced string is the empty string, always treat
2629     it as matched, any number of times (otherwise there could be infinite
2630     loops). */
2631    
2632     case OP_REF:
2633     {
2634     int length;
2635     int number = ecode[1] << 1; /* Doubled reference number */
2636     ecode += 2; /* Advance past the item */
2637    
2638     if (number >= offset_top || md->offset_vector[number] < 0)
2639     {
2640     md->errorcode = PCRE_ERROR_BADREF;
2641     return FALSE;
2642     }
2643    
2644     length = md->offset_vector[number+1] - md->offset_vector[number];
2645    
2646     switch (*ecode)
2647     {
2648     case OP_CRSTAR:
2649     case OP_CRMINSTAR:
2650     case OP_CRPLUS:
2651     case OP_CRMINPLUS:
2652     case OP_CRQUERY:
2653     case OP_CRMINQUERY:
2654     c = *ecode++ - OP_CRSTAR;
2655     minimize = (c & 1) != 0;
2656     min = rep_min[c]; /* Pick up values from tables; */
2657     max = rep_max[c]; /* zero for max => infinity */
2658     if (max == 0) max = INT_MAX;
2659     break;
2660    
2661     case OP_CRRANGE:
2662     case OP_CRMINRANGE:
2663     minimize = (*ecode == OP_CRMINRANGE);
2664     min = (ecode[1] << 8) + ecode[2];
2665     max = (ecode[3] << 8) + ecode[4];
2666     if (max == 0) max = INT_MAX;
2667     ecode += 5;
2668     break;
2669    
2670     default: /* No repeat follows */
2671     if (!match_ref(number, eptr, length, md)) return FALSE;
2672     eptr += length;
2673     continue; /* With the main loop */
2674     }
2675    
2676     /* If the length of the reference is zero, just continue with the
2677     main loop. */
2678    
2679     if (length == 0) continue;
2680    
2681     /* First, ensure the minimum number of matches are present. We get back
2682     the length of the reference string explicitly rather than passing the
2683     address of eptr, so that eptr can be a register variable. */
2684    
2685     for (i = 1; i <= min; i++)
2686     {
2687     if (!match_ref(number, eptr, length, md)) return FALSE;
2688     eptr += length;
2689     }
2690    
2691     /* If min = max, continue at the same level without recursion.
2692     They are not both allowed to be zero. */
2693    
2694     if (min == max) continue;
2695    
2696     /* If minimizing, keep trying and advancing the pointer */
2697    
2698     if (minimize)
2699     {
2700     for (i = min;; i++)
2701     {
2702     if (match(eptr, ecode, offset_top, md)) return TRUE;
2703     if (i >= max || !match_ref(number, eptr, length, md))
2704     return FALSE;
2705     eptr += length;
2706     }
2707     /* Control never gets here */
2708     }
2709    
2710     /* If maximizing, find the longest string and work backwards */
2711    
2712     else
2713     {
2714 nigel 7 const uschar *pp = eptr;
2715 nigel 3 for (i = min; i < max; i++)
2716     {
2717     if (!match_ref(number, eptr, length, md)) break;
2718     eptr += length;
2719     }
2720     while (eptr >= pp)
2721     {
2722     if (match(eptr, ecode, offset_top, md)) return TRUE;
2723     eptr -= length;
2724     }
2725     return FALSE;
2726     }
2727     }
2728     /* Control never gets here */
2729    
2730     /* Match a character class, possibly repeatedly. Look past the end of the
2731     item to see if there is repeat information following. Then obey similar
2732     code to character type repeats - written out again for speed. If caseless
2733     matching was set at runtime but not at compile time, we have to check both
2734 nigel 13 versions of a character, and we have to behave differently for positive and
2735     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2736     treated differently. */
2737 nigel 3
2738     case OP_CLASS:
2739 nigel 13 case OP_NEGCLASS:
2740 nigel 3 {
2741 nigel 13 BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2742 nigel 7 const uschar *data = ecode + 1; /* Save for matching */
2743     ecode += 33; /* Advance past the item */
2744 nigel 3
2745     switch (*ecode)
2746     {
2747     case OP_CRSTAR:
2748     case OP_CRMINSTAR:
2749     case OP_CRPLUS:
2750     case OP_CRMINPLUS:
2751     case OP_CRQUERY:
2752     case OP_CRMINQUERY:
2753     c = *ecode++ - OP_CRSTAR;
2754     minimize = (c & 1) != 0;
2755     min = rep_min[c]; /* Pick up values from tables; */
2756     max = rep_max[c]; /* zero for max => infinity */
2757     if (max == 0) max = INT_MAX;
2758     break;
2759    
2760     case OP_CRRANGE:
2761     case OP_CRMINRANGE:
2762     minimize = (*ecode == OP_CRMINRANGE);
2763     min = (ecode[1] << 8) + ecode[2];
2764     max = (ecode[3] << 8) + ecode[4];
2765     if (max == 0) max = INT_MAX;
2766     ecode += 5;
2767     break;
2768    
2769     default: /* No repeat follows */
2770 nigel 13 min = max = 1;
2771     break;
2772 nigel 3 }
2773    
2774     /* First, ensure the minimum number of matches are present. */
2775    
2776     for (i = 1; i <= min; i++)
2777     {
2778     if (eptr >= md->end_subject) return FALSE;
2779     c = *eptr++;
2780 nigel 13
2781     /* Either not runtime caseless, or it was a positive class. For
2782     runtime caseless, continue if either case is in the map. */
2783    
2784     if (!nasty_case)
2785 nigel 3 {
2786 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2787     if (md->runtime_caseless)
2788     {
2789     c = pcre_fcc[c];
2790     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2791     }
2792     }
2793    
2794     /* Runtime caseless and it was a negative class. Continue only if
2795     both cases are in the map. */
2796    
2797     else
2798     {
2799     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2800 nigel 3 c = pcre_fcc[c];
2801     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2802     }
2803 nigel 13
2804 nigel 3 return FALSE;
2805     }
2806    
2807     /* If max == min we can continue with the main loop without the
2808     need to recurse. */
2809    
2810     if (min == max) continue;
2811    
2812     /* If minimizing, keep testing the rest of the expression and advancing
2813     the pointer while it matches the class. */
2814    
2815     if (minimize)
2816     {
2817     for (i = min;; i++)
2818     {
2819     if (match(eptr, ecode, offset_top, md)) return TRUE;
2820     if (i >= max || eptr >= md->end_subject) return FALSE;
2821     c = *eptr++;
2822 nigel 13
2823     /* Either not runtime caseless, or it was a positive class. For
2824     runtime caseless, continue if either case is in the map. */
2825    
2826     if (!nasty_case)
2827 nigel 3 {
2828 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829     if (md->runtime_caseless)
2830     {
2831     c = pcre_fcc[c];
2832     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2833     }
2834     }
2835    
2836     /* Runtime caseless and it was a negative class. Continue only if
2837     both cases are in the map. */
2838    
2839     else
2840     {
2841     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2842 nigel 3 c = pcre_fcc[c];
2843     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2844     }
2845 nigel 13
2846 nigel 3 return FALSE;
2847     }
2848     /* Control never gets here */
2849     }
2850    
2851     /* If maximizing, find the longest possible run, then work backwards. */
2852    
2853     else
2854     {
2855 nigel 7 const uschar *pp = eptr;
2856 nigel 3 for (i = min; i < max; eptr++, i++)
2857     {
2858     if (eptr >= md->end_subject) break;
2859     c = *eptr;
2860 nigel 13
2861     /* Either not runtime caseless, or it was a positive class. For
2862     runtime caseless, continue if either case is in the map. */
2863    
2864     if (!nasty_case)
2865 nigel 3 {
2866 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2867     if (md->runtime_caseless)
2868     {
2869     c = pcre_fcc[c];
2870     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871     }
2872     }
2873    
2874     /* Runtime caseless and it was a negative class. Continue only if
2875     both cases are in the map. */
2876    
2877     else
2878     {
2879     if ((data[c/8] & (1 << (c&7))) == 0) break;
2880 nigel 3 c = pcre_fcc[c];
2881     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2882     }
2883 nigel 13
2884 nigel 3 break;
2885     }
2886    
2887     while (eptr >= pp)
2888     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2889     return FALSE;
2890     }
2891     }
2892     /* Control never gets here */
2893    
2894     /* Match a run of characters */
2895    
2896     case OP_CHARS:
2897     {
2898     register int length = ecode[1];
2899     ecode += 2;
2900    
2901 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
2902 nigel 3 if (eptr >= md->end_subject)
2903     printf("matching subject <null> against pattern ");
2904     else
2905     {
2906     printf("matching subject ");
2907     pchars(eptr, length, TRUE, md);
2908     printf(" against pattern ");
2909     }
2910     pchars(ecode, length, FALSE, md);
2911     printf("\n");
2912 nigel 9 #endif
2913 nigel 3
2914     if (length > md->end_subject - eptr) return FALSE;
2915     if (md->caseless)
2916     {
2917     while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;
2918     }
2919     else
2920     {
2921     while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
2922     }
2923     }
2924     break;
2925    
2926     /* Match a single character repeatedly; different opcodes share code. */
2927    
2928     case OP_EXACT:
2929     min = max = (ecode[1] << 8) + ecode[2];
2930     ecode += 3;
2931     goto REPEATCHAR;
2932    
2933     case OP_UPTO:
2934     case OP_MINUPTO:
2935     min = 0;
2936     max = (ecode[1] << 8) + ecode[2];
2937     minimize = *ecode == OP_MINUPTO;
2938     ecode += 3;
2939     goto REPEATCHAR;
2940    
2941     case OP_STAR:
2942     case OP_MINSTAR:
2943     case OP_PLUS:
2944     case OP_MINPLUS:
2945     case OP_QUERY:
2946     case OP_MINQUERY:
2947     c = *ecode++ - OP_STAR;
2948     minimize = (c & 1) != 0;
2949     min = rep_min[c]; /* Pick up values from tables; */
2950     max = rep_max[c]; /* zero for max => infinity */
2951     if (max == 0) max = INT_MAX;
2952    
2953     /* Common code for all repeated single-character matches. We can give
2954     up quickly if there are fewer than the minimum number of characters left in
2955     the subject. */
2956    
2957     REPEATCHAR:
2958     if (min > md->end_subject - eptr) return FALSE;
2959     c = *ecode++;
2960    
2961     /* The code is duplicated for the caseless and caseful cases, for speed,
2962     since matching characters is likely to be quite common. First, ensure the
2963     minimum number of matches are present. If min = max, continue at the same
2964     level without recursing. Otherwise, if minimizing, keep trying the rest of
2965     the expression and advancing one matching character if failing, up to the
2966     maximum. Alternatively, if maximizing, find the maximum number of
2967     characters and work backwards. */
2968    
2969 nigel 9 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2970     max, eptr));
2971 nigel 3
2972     if (md->caseless)
2973     {
2974     c = pcre_lcc[c];
2975     for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;
2976     if (min == max) continue;
2977     if (minimize)
2978     {
2979     for (i = min;; i++)
2980     {
2981     if (match(eptr, ecode, offset_top, md)) return TRUE;
2982     if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])
2983     return FALSE;
2984     }
2985     /* Control never gets here */
2986     }
2987     else
2988     {
2989 nigel 7 const uschar *pp = eptr;
2990 nigel 3 for (i = min; i < max; i++)
2991     {
2992     if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
2993     eptr++;
2994     }
2995     while (eptr >= pp)
2996     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2997     return FALSE;
2998     }
2999     /* Control never gets here */
3000     }
3001    
3002     /* Caseful comparisons */
3003    
3004     else
3005     {
3006     for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3007     if (min == max) continue;
3008     if (minimize)
3009     {
3010     for (i = min;; i++)
3011     {
3012     if (match(eptr, ecode, offset_top, md)) return TRUE;
3013     if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3014     }
3015     /* Control never gets here */
3016     }
3017     else
3018     {
3019 nigel 7 const uschar *pp = eptr;
3020 nigel 3 for (i = min; i < max; i++)
3021     {
3022     if (eptr >= md->end_subject || c != *eptr) break;
3023     eptr++;
3024     }
3025     while (eptr >= pp)
3026     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3027     return FALSE;
3028     }
3029     }
3030     /* Control never gets here */
3031    
3032     /* Match a negated single character */
3033    
3034     case OP_NOT:
3035 nigel 9 if (eptr >= md->end_subject) return FALSE;
3036 nigel 3 ecode++;
3037     if (md->caseless)
3038     {
3039     if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;
3040     }
3041     else
3042     {
3043     if (*ecode++ == *eptr++) return FALSE;
3044     }
3045     break;
3046    
3047     /* Match a negated single character repeatedly. This is almost a repeat of
3048     the code for a repeated single character, but I haven't found a nice way of
3049     commoning these up that doesn't require a test of the positive/negative
3050     option for each character match. Maybe that wouldn't add very much to the
3051     time taken, but character matching *is* what this is all about... */
3052    
3053     case OP_NOTEXACT:
3054     min = max = (ecode[1] << 8) + ecode[2];
3055     ecode += 3;
3056     goto REPEATNOTCHAR;
3057    
3058     case OP_NOTUPTO:
3059     case OP_NOTMINUPTO:
3060     min = 0;
3061     max = (ecode[1] << 8) + ecode[2];
3062     minimize = *ecode == OP_NOTMINUPTO;
3063     ecode += 3;
3064     goto REPEATNOTCHAR;
3065    
3066     case OP_NOTSTAR:
3067     case OP_NOTMINSTAR:
3068     case OP_NOTPLUS:
3069     case OP_NOTMINPLUS:
3070     case OP_NOTQUERY:
3071     case OP_NOTMINQUERY:
3072     c = *ecode++ - OP_NOTSTAR;
3073     minimize = (c & 1) != 0;
3074     min = rep_min[c]; /* Pick up values from tables; */
3075     max = rep_max[c]; /* zero for max => infinity */
3076     if (max == 0) max = INT_MAX;
3077    
3078     /* Common code for all repeated single-character matches. We can give
3079     up quickly if there are fewer than the minimum number of characters left in
3080     the subject. */
3081    
3082     REPEATNOTCHAR:
3083     if (min > md->end_subject - eptr) return FALSE;
3084     c = *ecode++;
3085    
3086     /* The code is duplicated for the caseless and caseful cases, for speed,
3087     since matching characters is likely to be quite common. First, ensure the
3088     minimum number of matches are present. If min = max, continue at the same
3089     level without recursing. Otherwise, if minimizing, keep trying the rest of
3090     the expression and advancing one matching character if failing, up to the
3091     maximum. Alternatively, if maximizing, find the maximum number of
3092     characters and work backwards. */
3093    
3094 nigel 9 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3095     max, eptr));
3096 nigel 3
3097     if (md->caseless)
3098     {
3099     c = pcre_lcc[c];
3100     for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;
3101     if (min == max) continue;
3102     if (minimize)
3103     {
3104     for (i = min;; i++)
3105     {
3106     if (match(eptr, ecode, offset_top, md)) return TRUE;
3107     if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])
3108     return FALSE;
3109     }
3110     /* Control never gets here */
3111     }
3112     else
3113     {
3114 nigel 7 const uschar *pp = eptr;
3115 nigel 3 for (i = min; i < max; i++)
3116     {
3117     if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
3118     eptr++;
3119     }
3120     while (eptr >= pp)
3121     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3122     return FALSE;
3123     }
3124     /* Control never gets here */
3125     }
3126    
3127     /* Caseful comparisons */
3128    
3129     else
3130     {
3131     for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3132     if (min == max) continue;
3133     if (minimize)
3134     {
3135     for (i = min;; i++)
3136     {
3137     if (match(eptr, ecode, offset_top, md)) return TRUE;
3138     if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3139     }
3140     /* Control never gets here */
3141     }
3142     else
3143     {
3144 nigel 7 const uschar *pp = eptr;
3145 nigel 3 for (i = min; i < max; i++)
3146     {
3147     if (eptr >= md->end_subject || c == *eptr) break;
3148     eptr++;
3149     }
3150     while (eptr >= pp)
3151     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3152     return FALSE;
3153     }
3154     }
3155     /* Control never gets here */
3156    
3157     /* Match a single character type repeatedly; several different opcodes
3158     share code. This is very similar to the code for single characters, but we
3159     repeat it in the interests of efficiency. */
3160    
3161     case OP_TYPEEXACT:
3162     min = max = (ecode[1] << 8) + ecode[2];
3163     minimize = TRUE;
3164     ecode += 3;
3165     goto REPEATTYPE;
3166    
3167     case OP_TYPEUPTO:
3168     case OP_TYPEMINUPTO:
3169     min = 0;
3170     max = (ecode[1] << 8) + ecode[2];
3171     minimize = *ecode == OP_TYPEMINUPTO;
3172     ecode += 3;
3173     goto REPEATTYPE;
3174    
3175     case OP_TYPESTAR:
3176     case OP_TYPEMINSTAR:
3177     case OP_TYPEPLUS:
3178     case OP_TYPEMINPLUS:
3179     case OP_TYPEQUERY:
3180     case OP_TYPEMINQUERY:
3181     c = *ecode++ - OP_TYPESTAR;
3182     minimize = (c & 1) != 0;
3183     min = rep_min[c]; /* Pick up values from tables; */
3184     max = rep_max[c]; /* zero for max => infinity */
3185     if (max == 0) max = INT_MAX;
3186    
3187     /* Common code for all repeated single character type matches */
3188    
3189     REPEATTYPE:
3190     ctype = *ecode++; /* Code for the character type */
3191    
3192     /* First, ensure the minimum number of matches are present. Use inline
3193     code for maximizing the speed, and do the type test once at the start
3194     (i.e. keep it out of the loop). Also test that there are at least the
3195     minimum number of characters before we start. */
3196    
3197     if (min > md->end_subject - eptr) return FALSE;
3198     if (min > 0) switch(ctype)
3199     {
3200     case OP_ANY:
3201     if (!md->dotall)
3202     { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3203     else eptr += min;
3204     break;
3205    
3206     case OP_NOT_DIGIT:
3207     for (i = 1; i <= min; i++)
3208     if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3209     break;
3210    
3211     case OP_DIGIT:
3212     for (i = 1; i <= min; i++)
3213     if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3214     break;
3215    
3216     case OP_NOT_WHITESPACE:
3217     for (i = 1; i <= min; i++)
3218     if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3219     break;
3220    
3221     case OP_WHITESPACE:
3222     for (i = 1; i <= min; i++)
3223     if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3224     break;
3225    
3226     case OP_NOT_WORDCHAR:
3227     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)
3228     return FALSE;
3229     break;
3230    
3231     case OP_WORDCHAR:
3232     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)
3233     return FALSE;
3234     break;
3235     }
3236    
3237     /* If min = max, continue at the same level without recursing */
3238    
3239     if (min == max) continue;
3240    
3241     /* If minimizing, we have to test the rest of the pattern before each
3242     subsequent match, so inlining isn't much help; just use the function. */
3243    
3244     if (minimize)
3245     {
3246     for (i = min;; i++)
3247     {
3248     if (match(eptr, ecode, offset_top, md)) return TRUE;
3249     if (i >= max || eptr >= md->end_subject ||
3250     !match_type(ctype, *eptr++, md->dotall))
3251     return FALSE;
3252     }
3253     /* Control never gets here */
3254     }
3255    
3256     /* If maximizing it is worth using inline code for speed, doing the type
3257     test once at the start (i.e. keep it out of the loop). */
3258    
3259     else
3260     {
3261 nigel 7 const uschar *pp = eptr;
3262 nigel 3 switch(ctype)
3263     {
3264     case OP_ANY:
3265     if (!md->dotall)
3266     {
3267     for (i = min; i < max; i++)
3268     {
3269     if (eptr >= md->end_subject || *eptr == '\n') break;
3270     eptr++;
3271     }
3272     }
3273     else
3274     {
3275     c = max - min;
3276     if (c > md->end_subject - eptr) c = md->end_subject - eptr;
3277     eptr += c;
3278     }
3279     break;
3280    
3281     case OP_NOT_DIGIT:
3282     for (i = min; i < max; i++)
3283     {
3284     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)
3285     break;
3286     eptr++;
3287     }
3288     break;
3289    
3290     case OP_DIGIT:
3291     for (i = min; i < max; i++)
3292     {
3293     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)
3294     break;
3295     eptr++;
3296     }
3297     break;
3298    
3299     case OP_NOT_WHITESPACE:
3300     for (i = min; i < max; i++)
3301     {
3302     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)
3303     break;
3304     eptr++;
3305     }
3306     break;
3307    
3308     case OP_WHITESPACE:
3309     for (i = min; i < max; i++)
3310     {
3311     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)
3312     break;
3313     eptr++;
3314     }
3315     break;
3316    
3317     case OP_NOT_WORDCHAR:
3318     for (i = min; i < max; i++)
3319     {
3320     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)
3321     break;
3322     eptr++;
3323     }
3324     break;
3325    
3326     case OP_WORDCHAR:
3327     for (i = min; i < max; i++)
3328     {
3329     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)
3330     break;
3331     eptr++;
3332     }
3333     break;
3334     }
3335    
3336     while (eptr >= pp)
3337     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3338     return FALSE;
3339     }
3340     /* Control never gets here */
3341    
3342     /* There's been some horrible disaster. */
3343    
3344     default:
3345 nigel 9 DPRINTF(("Unknown opcode %d\n", *ecode));
3346 nigel 3 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3347     return FALSE;
3348     }
3349    
3350     /* Do not stick any code in here without much thought; it is assumed
3351     that "continue" in the code above comes out to here to repeat the main
3352     loop. */
3353    
3354     } /* End of main loop */
3355     /* Control never reaches here */
3356     }
3357    
3358    
3359    
3360     /*************************************************
3361 nigel 9 * Segregate setjmp() *
3362     *************************************************/
3363    
3364     /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3365     is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3366     hide it in a separate function. This is called only when PCRE_EXTRA is set,
3367     since it's needed only for the extension \X option, and with any luck, a good
3368     compiler will spot the tail recursion and compile it efficiently.
3369    
3370 nigel 11 Arguments:
3371     eptr pointer in subject
3372     ecode position in code
3373     offset_top current top pointer
3374     md pointer to "static" info for the match
3375    
3376     Returns: TRUE if matched
3377 nigel 9 */
3378    
3379 nigel 11 static BOOL
3380     match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3381     match_data *match_block)
3382 nigel 9 {
3383 nigel 11 return setjmp(match_block->fail_env) == 0 &&
3384     match(eptr, ecode, offset_top, match_block);
3385 nigel 9 }
3386    
3387    
3388    
3389     /*************************************************
3390 nigel 3 * Execute a Regular Expression *
3391     *************************************************/
3392    
3393     /* This function applies a compiled re to a subject string and picks out
3394     portions of the string if it matches. Two elements in the vector are set for
3395     each substring: the offsets to the start and end of the substring.
3396    
3397     Arguments:
3398     external_re points to the compiled expression
3399     external_extra points to "hints" from pcre_study() or is NULL
3400     subject points to the subject string
3401     length length of subject string (may contain binary zeros)
3402     options option bits
3403     offsets points to a vector of ints to be filled in with offsets
3404     offsetcount the number of elements in the vector
3405    
3406     Returns: > 0 => success; value is the number of elements filled in
3407     = 0 => success, but offsets is not big enough
3408     -1 => failed to match
3409     < -1 => some kind of unexpected problem
3410     */
3411    
3412     int
3413     pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3414     const char *subject, int length, int options, int *offsets, int offsetcount)
3415     {
3416 nigel 11 int resetcount, ocount;
3417 nigel 3 int first_char = -1;
3418     match_data match_block;
3419 nigel 7 const uschar *start_bits = NULL;
3420 nigel 9 const uschar *start_match = (const uschar *)subject;
3421 nigel 7 const uschar *end_subject;
3422     const real_pcre *re = (const real_pcre *)external_re;
3423     const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3424 nigel 11 BOOL using_temporary_offsets = FALSE;
3425 nigel 3 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3426     BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3427    
3428     if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3429    
3430     if (re == NULL || subject == NULL ||
3431     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3432     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3433    
3434 nigel 7 match_block.start_subject = (const uschar *)subject;
3435 nigel 3 match_block.end_subject = match_block.start_subject + length;
3436     end_subject = match_block.end_subject;
3437    
3438     match_block.caseless = ((re->options | options) & PCRE_CASELESS) != 0;
3439     match_block.runtime_caseless = match_block.caseless &&
3440     (re->options & PCRE_CASELESS) == 0;
3441    
3442     match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;
3443     match_block.dotall = ((re->options | options) & PCRE_DOTALL) != 0;
3444     match_block.endonly = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;
3445    
3446     match_block.notbol = (options & PCRE_NOTBOL) != 0;
3447     match_block.noteol = (options & PCRE_NOTEOL) != 0;
3448    
3449     match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
3450    
3451     /* If the expression has got more back references than the offsets supplied can
3452     hold, we get a temporary bit of working store to use during the matching.
3453 nigel 11 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3454     of 2. */
3455 nigel 3
3456 nigel 11 ocount = offsetcount & (-2);
3457     if (re->top_backref > 0 && re->top_backref >= ocount/2)
3458 nigel 3 {
3459     ocount = re->top_backref * 2 + 2;
3460 nigel 15 match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3461 nigel 3 if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3462 nigel 11 using_temporary_offsets = TRUE;
3463 nigel 9 DPRINTF(("Got memory to hold back references\n"));
3464 nigel 3 }
3465     else match_block.offset_vector = offsets;
3466    
3467     match_block.offset_end = ocount;
3468     match_block.offset_overflow = FALSE;
3469    
3470     /* Compute the minimum number of offsets that we need to reset each time. Doing
3471     this makes a huge difference to execution time when there aren't many brackets
3472     in the pattern. */
3473    
3474     resetcount = 2 + re->top_bracket * 2;
3475     if (resetcount > offsetcount) resetcount = ocount;
3476    
3477     /* If MULTILINE is set at exec time but was not set at compile time, and the
3478     anchored flag is set, we must re-check because a setting provoked by ^ in the
3479     pattern is not right in multi-line mode. Calling is_anchored() again here does
3480     the right check, because multiline is now set. If it now yields FALSE, the
3481     expression must have had ^ starting some of its branches. Check to see if
3482     that is true for *all* branches, and if so, set the startline flag. */
3483    
3484     if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
3485     !is_anchored(re->code, match_block.multiline))
3486     {
3487     anchored = FALSE;
3488     if (is_startline(re->code)) startline = TRUE;
3489     }
3490    
3491     /* Set up the first character to match, if available. The first_char value is
3492     never set for an anchored regular expression, but the anchoring may be forced
3493     at run time, so we have to test for anchoring. The first char may be unset for
3494     an unanchored pattern, of course. If there's no first char and the pattern was
3495     studied, the may be a bitmap of possible first characters. However, we can
3496     use this only if the caseless state of the studying was correct. */
3497    
3498     if (!anchored)
3499     {
3500     if ((re->options & PCRE_FIRSTSET) != 0)
3501     {
3502     first_char = re->first_char;
3503     if (match_block.caseless) first_char = pcre_lcc[first_char];
3504     }
3505     else
3506     if (!startline && extra != NULL &&
3507     (extra->options & PCRE_STUDY_MAPPED) != 0 &&
3508     ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)
3509     start_bits = extra->start_bits;
3510     }
3511    
3512     /* Loop for unanchored matches; for anchored regexps the loop runs just once. */
3513    
3514     do
3515     {
3516 nigel 9 int rc;
3517 nigel 3 register int *iptr = match_block.offset_vector;
3518     register int *iend = iptr + resetcount;
3519    
3520     /* Reset the maximum number of extractions we might see. */
3521    
3522     while (iptr < iend) *iptr++ = -1;
3523    
3524     /* Advance to a unique first char if possible */
3525    
3526     if (first_char >= 0)
3527     {
3528     if (match_block.caseless)
3529     while (start_match < end_subject && pcre_lcc[*start_match] != first_char)
3530     start_match++;
3531     else
3532     while (start_match < end_subject && *start_match != first_char)
3533     start_match++;
3534     }
3535    
3536     /* Or to just after \n for a multiline match if possible */
3537    
3538     else if (startline)
3539     {
3540     if (start_match > match_block.start_subject)
3541     {
3542     while (start_match < end_subject && start_match[-1] != '\n')
3543     start_match++;
3544     }
3545     }
3546    
3547     /* Or to a non-unique first char */
3548    
3549     else if (start_bits != NULL)
3550     {
3551     while (start_match < end_subject)
3552     {
3553     register int c = *start_match;
3554     if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
3555     }
3556     }
3557    
3558 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3559 nigel 3 printf(">>>> Match against: ");
3560     pchars(start_match, end_subject - start_match, TRUE, &match_block);
3561     printf("\n");
3562 nigel 9 #endif
3563 nigel 3
3564     /* When a match occurs, substrings will be set for all internal extractions;
3565     we just need to set up the whole thing as substring 0 before returning. If
3566     there were too many extractions, set the return code to zero. In the case
3567     where we had to get some local store to hold offsets for backreferences, copy
3568     those back references that we can. In this case there need not be overflow
3569     if certain parts of the pattern were not used.
3570    
3571     Before starting the match, we have to set up a longjmp() target to enable
3572 nigel 9 the "cut" operation to fail a match completely without backtracking. This
3573     is done in a separate function to avoid compiler warnings. We need not do
3574     it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3575     enabled. */
3576 nigel 3
3577 nigel 11 if ((re->options & PCRE_EXTRA) != 0)
3578     {
3579     if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3580     continue;
3581     }
3582     else if (!match(start_match, re->code, 2, &match_block)) continue;
3583 nigel 9
3584     /* Copy the offset information from temporary store if necessary */
3585    
3586 nigel 11 if (using_temporary_offsets)
3587 nigel 3 {
3588 nigel 9 if (offsetcount >= 4)
3589 nigel 3 {
3590 nigel 9 memcpy(offsets + 2, match_block.offset_vector + 2,
3591     (offsetcount - 2) * sizeof(int));
3592 nigel 11 DPRINTF(("Copied offsets from temporary memory\n"));
3593 nigel 3 }
3594 nigel 9 if (match_block.end_offset_top > offsetcount)
3595     match_block.offset_overflow = TRUE;
3596 nigel 3
3597 nigel 9 DPRINTF(("Freeing temporary memory\n"));
3598     (pcre_free)(match_block.offset_vector);
3599     }
3600 nigel 3
3601 nigel 9 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
3602 nigel 3
3603 nigel 9 if (match_block.offset_end < 2) rc = 0; else
3604     {
3605     offsets[0] = start_match - match_block.start_subject;
3606     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3607 nigel 3 }
3608 nigel 9
3609     DPRINTF((">>>> returning %d\n", rc));
3610     return rc;
3611 nigel 3 }
3612     while (!anchored &&
3613     match_block.errorcode == PCRE_ERROR_NOMATCH &&
3614     start_match++ < end_subject);
3615    
3616 nigel 11 if (using_temporary_offsets)
3617     {
3618     DPRINTF(("Freeing temporary memory\n"));
3619     (pcre_free)(match_block.offset_vector);
3620     }
3621    
3622 nigel 9 DPRINTF((">>>> returning %d\n", match_block.errorcode));
3623 nigel 3
3624     return match_block.errorcode;
3625     }
3626    
3627     /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12