/[pcre]/code/trunk/pcre.c
ViewVC logotype

Contents of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (hide annotations) (download)
Sat Feb 24 21:38:21 2007 UTC (6 years, 3 months ago) by nigel
File MIME type: text/plain
File size: 103422 byte(s)
Load pcre-1.05 into code/trunk.

1 nigel 3 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12     Copyright (c) 1997 University of Cambridge
13    
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28     -----------------------------------------------------------------------------
29     */
30    
31    
32     /* Define DEBUG to get debugging output on stdout. */
33    
34     /* #define DEBUG */
35    
36 nigel 9 /* Use a macro for debugging printing, 'cause that eliminates the the use
37     of #ifdef inline, and there are *still* stupid compilers about that don't like
38     indented pre-processor statements. I suppose it's only been 10 years... */
39 nigel 3
40 nigel 9 #ifdef DEBUG
41     #define DPRINTF(p) printf p
42     #else
43     #define DPRINTF(p) /*nothing*/
44     #endif
45    
46 nigel 3 /* Include the internals header, which itself includes Standard C headers plus
47     the external pcre header. */
48    
49     #include "internal.h"
50    
51    
52     /* Min and max values for the common repeats; for the maxima, 0 => infinity */
53    
54     static char rep_min[] = { 0, 0, 1, 1, 0, 0 };
55     static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
56    
57 nigel 13 /* Text forms of OP_ values and things, for debugging (not all used) */
58 nigel 3
59     #ifdef DEBUG
60 nigel 7 static const char *OP_names[] = {
61     "End", "\\A", "\\B", "\\b", "\\D", "\\d",
62 nigel 3 "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
63     "not",
64     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
65     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
66     "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
67     "*", "*?", "+", "+?", "?", "??", "{", "{",
68 nigel 13 "class", "negclass", "Ref",
69 nigel 3 "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
70     "Brazero", "Braminzero", "Bra"
71     };
72     #endif
73    
74     /* Table for handling escaped characters in the range '0'-'z'. Positive returns
75     are simple data values; negative values are for special things like \d and so
76     on. Zero means further processing is needed (for things like \x), or the escape
77     is invalid. */
78    
79     static short int escapes[] = {
80     0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */
81     0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */
82     '@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */
83     0, 0, 0, 0, 0, 0, 0, 0, /* H - O */
84     0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */
85     0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */
86     '`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */
87     0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */
88     0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */
89     0, 0, 0 /* x - z */
90     };
91    
92     /* Definition to allow mutual recursion */
93    
94 nigel 13 static BOOL
95     compile_regex(int, int *, uschar **, const uschar **, const char **);
96 nigel 3
97     /* Structure for passing "static" information around between the functions
98     doing the matching, so that they are thread-safe. */
99    
100     typedef struct match_data {
101     int errorcode; /* As it says */
102     int *offset_vector; /* Offset vector */
103     int offset_end; /* One past the end */
104     BOOL offset_overflow; /* Set if too many extractions */
105     BOOL caseless; /* Case-independent flag */
106     BOOL runtime_caseless; /* Caseless forced at run time */
107     BOOL multiline; /* Multiline flag */
108     BOOL notbol; /* NOTBOL flag */
109     BOOL noteol; /* NOTEOL flag */
110     BOOL dotall; /* Dot matches any char */
111     BOOL endonly; /* Dollar not before final \n */
112 nigel 7 const uschar *start_subject; /* Start of the subject string */
113     const uschar *end_subject; /* End of the subject string */
114 nigel 3 jmp_buf fail_env; /* Environment for longjump() break out */
115 nigel 7 const uschar *end_match_ptr; /* Subject position at end match */
116 nigel 3 int end_offset_top; /* Highwater mark at end of match */
117     } match_data;
118    
119    
120    
121     /*************************************************
122     * Global variables *
123     *************************************************/
124    
125     /* PCRE is thread-clean and doesn't use any global variables in the normal
126     sense. However, it calls memory allocation and free functions via the two
127     indirections below, which are can be changed by the caller, but are shared
128     between all threads. */
129    
130     void *(*pcre_malloc)(size_t) = malloc;
131     void (*pcre_free)(void *) = free;
132    
133    
134    
135    
136     /*************************************************
137     * Return version string *
138     *************************************************/
139    
140 nigel 7 const char *
141 nigel 3 pcre_version(void)
142     {
143     return PCRE_VERSION;
144     }
145    
146    
147    
148    
149     /*************************************************
150     * Return info about a compiled pattern *
151     *************************************************/
152    
153     /* This function picks potentially useful data out of the private
154     structure.
155    
156     Arguments:
157     external_re points to compiled code
158     optptr where to pass back the options
159     first_char where to pass back the first character,
160     or -1 if multiline and all branches start ^,
161     or -2 otherwise
162    
163     Returns: number of identifying extraction brackets
164     or negative values on error
165     */
166    
167     int
168     pcre_info(const pcre *external_re, int *optptr, int *first_char)
169     {
170 nigel 7 const real_pcre *re = (const real_pcre *)external_re;
171 nigel 3 if (re == NULL) return PCRE_ERROR_NULL;
172     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
173     if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
174     if (first_char != NULL)
175     *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
176     ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
177     return re->top_bracket;
178     }
179    
180    
181    
182    
183     #ifdef DEBUG
184     /*************************************************
185     * Debugging function to print chars *
186     *************************************************/
187    
188     /* Print a sequence of chars in printable format, stopping at the end of the
189     subject if the requested.
190    
191     Arguments:
192     p points to characters
193     length number to print
194     is_subject TRUE if printing from within md->start_subject
195     md pointer to matching data block, if is_subject is TRUE
196    
197     Returns: nothing
198     */
199    
200 nigel 9 static void
201     pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
202 nigel 3 {
203     int c;
204     if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
205     while (length-- > 0)
206     if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
207     }
208     #endif
209    
210    
211    
212    
213     /*************************************************
214     * Check subpattern for empty operand *
215     *************************************************/
216    
217     /* This function checks a bracketed subpattern to see if any of the paths
218     through it could match an empty string. This is used to diagnose an error if
219     such a subpattern is followed by a quantifier with an unlimited upper bound.
220    
221     Argument:
222     code points to the opening bracket
223    
224     Returns: TRUE or FALSE
225     */
226    
227     static BOOL
228     could_be_empty(uschar *code)
229     {
230     do {
231     uschar *cc = code + 3;
232    
233     /* Scan along the opcodes for this branch; as soon as we find something
234     that matches a non-empty string, break out and advance to test the next
235     branch. If we get to the end of the branch, return TRUE for the whole
236     sub-expression. */
237    
238     for (;;)
239     {
240     /* Test an embedded subpattern; if it could not be empty, break the
241     loop. Otherwise carry on in the branch. */
242    
243 nigel 5 if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)
244 nigel 3 {
245     if (!could_be_empty(cc)) break;
246     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
247     cc += 3;
248     }
249    
250     else switch (*cc)
251     {
252     /* Reached end of a branch: the subpattern may match the empty string */
253    
254     case OP_ALT:
255     case OP_KET:
256     case OP_KETRMAX:
257     case OP_KETRMIN:
258     return TRUE;
259    
260     /* Skip over assertive subpatterns */
261    
262     case OP_ASSERT:
263     case OP_ASSERT_NOT:
264     do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
265     cc += 3;
266     break;
267    
268     /* Skip over things that don't match chars */
269    
270     case OP_SOD:
271     case OP_EOD:
272     case OP_CIRC:
273     case OP_DOLL:
274     case OP_BRAZERO:
275     case OP_BRAMINZERO:
276     case OP_NOT_WORD_BOUNDARY:
277     case OP_WORD_BOUNDARY:
278     cc++;
279     break;
280    
281     /* Skip over simple repeats with zero lower bound */
282    
283     case OP_STAR:
284     case OP_MINSTAR:
285     case OP_QUERY:
286     case OP_MINQUERY:
287 nigel 5 case OP_NOTSTAR:
288     case OP_NOTMINSTAR:
289     case OP_NOTQUERY:
290     case OP_NOTMINQUERY:
291 nigel 3 case OP_TYPESTAR:
292     case OP_TYPEMINSTAR:
293     case OP_TYPEQUERY:
294     case OP_TYPEMINQUERY:
295     cc += 2;
296     break;
297    
298     /* Skip over UPTOs (lower bound is zero) */
299    
300     case OP_UPTO:
301     case OP_MINUPTO:
302     case OP_TYPEUPTO:
303     case OP_TYPEMINUPTO:
304     cc += 4;
305     break;
306    
307     /* Check a class or a back reference for a zero minimum */
308    
309     case OP_CLASS:
310 nigel 13 case OP_NEGCLASS:
311 nigel 3 case OP_REF:
312 nigel 5 cc += (*cc == OP_REF)? 2 : 33;
313 nigel 3
314     switch (*cc)
315     {
316     case OP_CRSTAR:
317     case OP_CRMINSTAR:
318     case OP_CRQUERY:
319     case OP_CRMINQUERY:
320     cc++;
321     break;
322    
323     case OP_CRRANGE:
324     case OP_CRMINRANGE:
325     if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;
326     cc += 3;
327     break;
328    
329     default:
330     goto NEXT_BRANCH;
331     }
332     break;
333    
334     /* Anything else matches at least one character */
335    
336     default:
337     goto NEXT_BRANCH;
338     }
339     }
340    
341     NEXT_BRANCH:
342     code += (code[1] << 8) + code[2];
343     }
344     while (*code == OP_ALT);
345    
346     /* No branches match the empty string */
347    
348     return FALSE;
349     }
350    
351    
352    
353     /*************************************************
354     * Handle escapes *
355     *************************************************/
356    
357     /* This function is called when a \ has been encountered. It either returns a
358     positive value for a simple escape such as \n, or a negative value which
359     encodes one of the more complicated things such as \d. On entry, ptr is
360     pointing at the \. On exit, it is on the final character of the escape
361     sequence.
362    
363     Arguments:
364     ptrptr points to the pattern position pointer
365     errorptr points to the pointer to the error message
366     bracount number of previous extracting brackets
367     options the options bits
368     isclass TRUE if inside a character class
369    
370     Returns: zero or positive => a data character
371     negative => a special escape sequence
372     on error, errorptr is set
373     */
374    
375     static int
376 nigel 7 check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
377     int options, BOOL isclass)
378 nigel 3 {
379 nigel 7 const uschar *ptr = *ptrptr;
380 nigel 3 int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */
381     int i;
382    
383     if (c == 0) *errorptr = ERR1;
384    
385     /* Digits or letters may have special meaning; all others are literals. */
386    
387     else if (c < '0' || c > 'z') {}
388    
389     /* Do an initial lookup in a table. A non-zero result is something that can be
390     returned immediately. Otherwise further processing may be required. */
391    
392     else if ((i = escapes[c - '0']) != 0) c = i;
393    
394     /* Escapes that need further processing, or are illegal. */
395    
396     else
397     {
398 nigel 7 const uschar *oldptr;
399 nigel 3 switch (c)
400     {
401     /* The handling of escape sequences consisting of a string of digits
402     starting with one that is not zero is not straightforward. By experiment,
403     the way Perl works seems to be as follows:
404    
405     Outside a character class, the digits are read as a decimal number. If the
406     number is less than 10, or if there are that many previous extracting
407     left brackets, then it is a back reference. Otherwise, up to three octal
408     digits are read to form an escaped byte. Thus \123 is likely to be octal
409     123 (cf \0123, which is octal 012 followed by the literal 3). If the octal
410     value is greater than 377, the least significant 8 bits are taken. Inside a
411     character class, \ followed by a digit is always an octal number. */
412    
413     case '1': case '2': case '3': case '4': case '5':
414     case '6': case '7': case '8': case '9':
415    
416     if (!isclass)
417     {
418     oldptr = ptr;
419     c -= '0';
420     while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)
421     c = c * 10 + *(++ptr) - '0';
422     if (c < 10 || c <= bracount)
423     {
424     c = -(ESC_REF + c);
425     break;
426     }
427     ptr = oldptr; /* Put the pointer back and fall through */
428     }
429    
430     /* Handle an octal number following \. If the first digit is 8 or 9, Perl
431     generates a binary zero byte and treats the digit as a following literal.
432     Thus we have to pull back the pointer by one. */
433    
434     if ((c = *ptr) >= '8')
435     {
436     ptr--;
437     c = 0;
438     break;
439     }
440    
441     /* \0 always starts an octal number, but we may drop through to here with a
442     larger first octal digit */
443    
444     case '0':
445     c -= '0';
446     while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&
447     ptr[1] != '8' && ptr[1] != '9')
448     c = c * 8 + *(++ptr) - '0';
449     break;
450    
451     /* Special escapes not starting with a digit are straightforward */
452    
453     case 'x':
454     c = 0;
455     while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)
456     {
457     ptr++;
458     c = c * 16 + pcre_lcc[*ptr] -
459     (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
460     }
461     break;
462    
463     case 'c':
464     c = *(++ptr);
465     if (c == 0)
466     {
467     *errorptr = ERR2;
468     return 0;
469     }
470    
471     /* A letter is upper-cased; then the 0x40 bit is flipped */
472    
473     if (c >= 'a' && c <= 'z') c = pcre_fcc[c];
474     c ^= 0x40;
475     break;
476    
477     /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
478     other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
479     for Perl compatibility, it is a literal. */
480    
481     default:
482     if ((options & PCRE_EXTRA) != 0) switch(c)
483     {
484     case 'X':
485     c = -ESC_X; /* This could be a lookup if it ever got into Perl */
486     break;
487    
488     default:
489     *errorptr = ERR3;
490     break;
491     }
492     break;
493     }
494     }
495    
496     *ptrptr = ptr;
497     return c;
498     }
499    
500    
501    
502     /*************************************************
503     * Check for counted repeat *
504     *************************************************/
505    
506     /* This function is called when a '{' is encountered in a place where it might
507     start a quantifier. It looks ahead to see if it really is a quantifier or not.
508     It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}
509     where the ddds are digits.
510    
511     Arguments:
512     p pointer to the first char after '{'
513    
514     Returns: TRUE or FALSE
515     */
516    
517     static BOOL
518 nigel 7 is_counted_repeat(const uschar *p)
519 nigel 3 {
520     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
521     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
522     if (*p == '}') return TRUE;
523    
524     if (*p++ != ',') return FALSE;
525     if (*p == '}') return TRUE;
526    
527     if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
528     while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
529     return (*p == '}');
530     }
531    
532    
533    
534     /*************************************************
535     * Read repeat counts *
536     *************************************************/
537    
538     /* Read an item of the form {n,m} and return the values. This is called only
539     after is_counted_repeat() has confirmed that a repeat-count quantifier exists,
540     so the syntax is guaranteed to be correct, but we need to check the values.
541    
542     Arguments:
543     p pointer to first char after '{'
544     minp pointer to int for min
545     maxp pointer to int for max
546     returned as -1 if no max
547     errorptr points to pointer to error message
548    
549     Returns: pointer to '}' on success;
550     current ptr on error, with errorptr set
551     */
552    
553 nigel 7 static const uschar *
554     read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
555 nigel 3 {
556     int min = 0;
557     int max = -1;
558    
559     while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
560    
561     if (*p == '}') max = min; else
562     {
563     if (*(++p) != '}')
564     {
565     max = 0;
566     while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
567     if (max < min)
568     {
569     *errorptr = ERR4;
570     return p;
571     }
572     }
573     }
574    
575     /* Do paranoid checks, then fill in the required variables, and pass back the
576     pointer to the terminating '}'. */
577    
578     if (min > 65535 || max > 65535)
579     *errorptr = ERR5;
580     else
581     {
582     *minp = min;
583     *maxp = max;
584     }
585     return p;
586     }
587    
588    
589    
590     /*************************************************
591     * Compile one branch *
592     *************************************************/
593    
594     /* Scan the pattern, compiling it into the code vector.
595    
596     Arguments:
597     options the option bits
598     bracket points to number of brackets used
599     code points to the pointer to the current code point
600     ptrptr points to the current pattern pointer
601     errorptr points to pointer to error message
602    
603     Returns: TRUE on success
604     FALSE, with *errorptr set on error
605     */
606    
607     static BOOL
608 nigel 7 compile_branch(int options, int *brackets, uschar **codeptr,
609     const uschar **ptrptr, const char **errorptr)
610 nigel 3 {
611     int repeat_type, op_type;
612     int repeat_min, repeat_max;
613     int bravalue, length;
614     register int c;
615     register uschar *code = *codeptr;
616 nigel 7 const uschar *ptr = *ptrptr;
617     const uschar *oldptr;
618 nigel 3 uschar *previous = NULL;
619     uschar class[32];
620    
621     /* Switch on next character until the end of the branch */
622    
623     for (;; ptr++)
624     {
625     BOOL negate_class;
626     int class_charcount;
627     int class_lastchar;
628    
629     c = *ptr;
630     if ((options & PCRE_EXTENDED) != 0)
631     {
632     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
633     if (c == '#')
634     {
635     while ((c = *(++ptr)) != 0 && c != '\n');
636     continue;
637     }
638     }
639    
640     switch(c)
641     {
642     /* The branch terminates at end of string, |, or ). */
643    
644     case 0:
645     case '|':
646     case ')':
647     *codeptr = code;
648     *ptrptr = ptr;
649     return TRUE;
650    
651     /* Handle single-character metacharacters */
652    
653     case '^':
654     previous = NULL;
655     *code++ = OP_CIRC;
656     break;
657    
658     case '$':
659     previous = NULL;
660     *code++ = OP_DOLL;
661     break;
662    
663     case '.':
664     previous = code;
665     *code++ = OP_ANY;
666     break;
667    
668     /* Character classes. These always build a 32-byte bitmap of the permitted
669     characters, except in the special case where there is only one character.
670     For negated classes, we build the map as usual, then invert it at the end.
671     */
672    
673     case '[':
674     previous = code;
675    
676 nigel 13 /* If the first character is '^', set the negation flag, and use a
677     different opcode. This only matters if caseless matching is specified at
678     runtime. */
679 nigel 3
680     if ((c = *(++ptr)) == '^')
681     {
682     negate_class = TRUE;
683 nigel 13 *code++ = OP_NEGCLASS;
684 nigel 3 c = *(++ptr);
685     }
686 nigel 13 else
687     {
688     negate_class = FALSE;
689     *code++ = OP_CLASS;
690     }
691 nigel 3
692     /* Keep a count of chars so that we can optimize the case of just a single
693     character. */
694    
695     class_charcount = 0;
696     class_lastchar = -1;
697    
698     /* Initialize the 32-char bit map to all zeros. We have to build the
699     map in a temporary bit of store, in case the class contains only 1
700     character, because in that case the compiled code doesn't use the
701     bit map. */
702    
703     memset(class, 0, 32 * sizeof(uschar));
704    
705     /* Process characters until ] is reached. By writing this as a "do" it
706     means that an initial ] is taken as a data character. */
707    
708     do
709     {
710     if (c == 0)
711     {
712     *errorptr = ERR6;
713     goto FAILED;
714     }
715    
716     /* Backslash may introduce a single character, or it may introduce one
717     of the specials, which just set a flag. Escaped items are checked for
718     validity in the pre-compiling pass. The sequence \b is a special case.
719 nigel 7 Inside a class (and only there) it is treated as backspace. Elsewhere
720 nigel 3 it marks a word boundary. Other escapes have preset maps ready to
721     or into the one we are building. We assume they have more than one
722     character in them, so set class_count bigger than one. */
723    
724     if (c == '\\')
725     {
726     c = check_escape(&ptr, errorptr, *brackets, options, TRUE);
727     if (-c == ESC_b) c = '\b';
728     else if (c < 0)
729     {
730     class_charcount = 10;
731     switch (-c)
732     {
733     case ESC_d:
734     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];
735     continue;
736    
737     case ESC_D:
738     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];
739     continue;
740    
741     case ESC_w:
742     for (c = 0; c < 32; c++)
743     class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);
744     continue;
745    
746     case ESC_W:
747     for (c = 0; c < 32; c++)
748     class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);
749     continue;
750    
751     case ESC_s:
752     for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];
753     continue;
754    
755     case ESC_S:
756     for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];
757     continue;
758    
759     default:
760     *errorptr = ERR7;
761     goto FAILED;
762     }
763     }
764     /* Fall through if single character */
765     }
766    
767     /* A single character may be followed by '-' to form a range. However,
768     Perl does not permit ']' to be the end of the range. A '-' character
769     here is treated as a literal. */
770    
771     if (ptr[1] == '-' && ptr[2] != ']')
772     {
773     int d;
774     ptr += 2;
775     d = *ptr;
776    
777     if (d == 0)
778     {
779     *errorptr = ERR6;
780     goto FAILED;
781     }
782    
783     /* The second part of a range can be a single-character escape, but
784     not any of the other escapes. */
785    
786     if (d == '\\')
787     {
788     d = check_escape(&ptr, errorptr, *brackets, options, TRUE);
789     if (d < 0)
790     {
791     if (d == -ESC_b) d = '\b'; else
792     {
793     *errorptr = ERR7;
794     goto FAILED;
795     }
796     }
797     }
798    
799     if (d < c)
800     {
801     *errorptr = ERR8;
802     goto FAILED;
803     }
804    
805     for (; c <= d; c++)
806     {
807     class[c/8] |= (1 << (c&7));
808     if ((options & PCRE_CASELESS) != 0)
809     {
810     int uc = pcre_fcc[c]; /* flip case */
811     class[uc/8] |= (1 << (uc&7));
812     }
813     class_charcount++; /* in case a one-char range */
814     class_lastchar = c;
815     }
816     continue; /* Go get the next char in the class */
817     }
818    
819     /* Handle a lone single character - we can get here for a normal
820     non-escape char, or after \ that introduces a single character. */
821    
822     class [c/8] |= (1 << (c&7));
823     if ((options & PCRE_CASELESS) != 0)
824     {
825     c = pcre_fcc[c]; /* flip case */
826     class[c/8] |= (1 << (c&7));
827     }
828     class_charcount++;
829     class_lastchar = c;
830     }
831    
832     /* Loop until ']' reached; the check for end of string happens inside the
833     loop. This "while" is the end of the "do" above. */
834    
835     while ((c = *(++ptr)) != ']');
836    
837     /* If class_charcount is 1 and class_lastchar is not negative, we saw
838     precisely one character. This doesn't need the whole 32-byte bit map.
839     We turn it into a 1-character OP_CHAR if it's positive, or OP_NOT if
840     it's negative. */
841    
842     if (class_charcount == 1 && class_lastchar >= 0)
843     {
844     if (negate_class)
845     {
846     code[-1] = OP_NOT;
847     }
848     else
849     {
850     code[-1] = OP_CHARS;
851     *code++ = 1;
852     }
853     *code++ = class_lastchar;
854     }
855    
856     /* Otherwise, negate the 32-byte map if necessary, and copy it into
857     the code vector. */
858    
859     else
860     {
861     if (negate_class)
862     for (c = 0; c < 32; c++) code[c] = ~class[c];
863     else
864     memcpy(code, class, 32);
865     code += 32;
866     }
867     break;
868    
869     /* Various kinds of repeat */
870    
871     case '{':
872     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
873     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);
874     if (*errorptr != NULL) goto FAILED;
875     goto REPEAT;
876    
877     case '*':
878     repeat_min = 0;
879     repeat_max = -1;
880     goto REPEAT;
881    
882     case '+':
883     repeat_min = 1;
884     repeat_max = -1;
885     goto REPEAT;
886    
887     case '?':
888     repeat_min = 0;
889     repeat_max = 1;
890    
891     REPEAT:
892     if (previous == NULL)
893     {
894     *errorptr = ERR9;
895     goto FAILED;
896     }
897    
898     /* If the next character is '?' this is a minimizing repeat. Advance to the
899     next character. */
900    
901     if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;
902    
903     /* If the maximum is zero then the minimum must also be zero; Perl allows
904     this case, so we do too - by simply omitting the item altogether. */
905    
906     if (repeat_max == 0) code = previous;
907    
908     /* If previous was a string of characters, chop off the last one and use it
909     as the subject of the repeat. If there was only one character, we can
910     abolish the previous item altogether. */
911    
912     else if (*previous == OP_CHARS)
913     {
914     int len = previous[1];
915     if (len == 1)
916     {
917     c = previous[2];
918     code = previous;
919     }
920     else
921     {
922     c = previous[len+1];
923     previous[1]--;
924     code--;
925     }
926     op_type = 0; /* Use single-char op codes */
927     goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
928     }
929    
930     /* If previous was a single negated character ([^a] or similar), we use
931     one of the special opcodes, replacing it. The code is shared with single-
932     character repeats by adding a suitable offset into repeat_type. */
933    
934     else if ((int)*previous == OP_NOT)
935     {
936     op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */
937     c = previous[1];
938     code = previous;
939     goto OUTPUT_SINGLE_REPEAT;
940     }
941    
942     /* If previous was a character type match (\d or similar), abolish it and
943     create a suitable repeat item. The code is shared with single-character
944     repeats by adding a suitable offset into repeat_type. */
945    
946     else if ((int)*previous < OP_EOD || *previous == OP_ANY)
947     {
948     op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
949     c = *previous;
950     code = previous;
951    
952     OUTPUT_SINGLE_REPEAT:
953     repeat_type += op_type; /* Combine both values for many cases */
954    
955     /* A minimum of zero is handled either as the special case * or ?, or as
956     an UPTO, with the maximum given. */
957    
958     if (repeat_min == 0)
959     {
960     if (repeat_max == -1) *code++ = OP_STAR + repeat_type;
961     else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;
962     else
963     {
964     *code++ = OP_UPTO + repeat_type;
965     *code++ = repeat_max >> 8;
966     *code++ = (repeat_max & 255);
967     }
968     }
969    
970     /* The case {1,} is handled as the special case + */
971    
972     else if (repeat_min == 1 && repeat_max == -1)
973     *code++ = OP_PLUS + repeat_type;
974    
975     /* The case {n,n} is just an EXACT, while the general case {n,m} is
976     handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */
977    
978     else
979     {
980     if (repeat_min != 1)
981     {
982     *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */
983     *code++ = repeat_min >> 8;
984     *code++ = (repeat_min & 255);
985     }
986    
987     /* If the mininum is 1 and the previous item was a character string,
988     we either have to put back the item that got cancelled if the string
989     length was 1, or add the character back onto the end of a longer
990     string. For a character type nothing need be done; it will just get put
991     back naturally. */
992    
993     else if (*previous == OP_CHARS)
994     {
995     if (code == previous) code += 2; else previous[1]++;
996     }
997    
998 nigel 9 /* If the maximum is unlimited, insert an OP_STAR. */
999 nigel 3
1000 nigel 9 if (repeat_max < 0)
1001 nigel 3 {
1002     *code++ = c;
1003 nigel 9 *code++ = OP_STAR + repeat_type;
1004     }
1005    
1006     /* Else insert an UPTO if the max is greater than the min. */
1007    
1008     else if (repeat_max != repeat_min)
1009     {
1010     *code++ = c;
1011 nigel 3 repeat_max -= repeat_min;
1012     *code++ = OP_UPTO + repeat_type;
1013     *code++ = repeat_max >> 8;
1014     *code++ = (repeat_max & 255);
1015     }
1016     }
1017    
1018     /* The character or character type itself comes last in all cases. */
1019    
1020     *code++ = c;
1021     }
1022    
1023     /* If previous was a character class or a back reference, we put the repeat
1024     stuff after it. */
1025    
1026 nigel 13 else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1027     *previous == OP_REF)
1028 nigel 3 {
1029     if (repeat_min == 0 && repeat_max == -1)
1030     *code++ = OP_CRSTAR + repeat_type;
1031     else if (repeat_min == 1 && repeat_max == -1)
1032     *code++ = OP_CRPLUS + repeat_type;
1033     else if (repeat_min == 0 && repeat_max == 1)
1034     *code++ = OP_CRQUERY + repeat_type;
1035     else
1036     {
1037     *code++ = OP_CRRANGE + repeat_type;
1038     *code++ = repeat_min >> 8;
1039     *code++ = repeat_min & 255;
1040     if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */
1041     *code++ = repeat_max >> 8;
1042     *code++ = repeat_max & 255;
1043     }
1044     }
1045    
1046     /* If previous was a bracket group, we may have to replicate it in certain
1047     cases. If the maximum repeat count is unlimited, check that the bracket
1048     group cannot match the empty string, and diagnose an error if it can. */
1049    
1050     else if ((int)*previous >= OP_BRA)
1051     {
1052     int i;
1053 nigel 9 int len = code - previous;
1054 nigel 3
1055     if (repeat_max == -1 && could_be_empty(previous))
1056     {
1057     *errorptr = ERR10;
1058     goto FAILED;
1059     }
1060    
1061     /* If the minimum is greater than zero, and the maximum is unlimited or
1062     equal to the minimum, the first copy remains where it is, and is
1063     replicated up to the minimum number of times. This case includes the +
1064     repeat, but of course no replication is needed in that case. */
1065    
1066     if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))
1067     {
1068     for (i = 1; i < repeat_min; i++)
1069     {
1070 nigel 9 memcpy(code, previous, len);
1071     code += len;
1072 nigel 3 }
1073     }
1074    
1075     /* If the minimum is zero, stick BRAZERO in front of the first copy.
1076     Then, if there is a fixed upper limit, replicated up to that many times,
1077     sticking BRAZERO in front of all the optional ones. */
1078    
1079     else
1080     {
1081     if (repeat_min == 0)
1082     {
1083 nigel 9 memmove(previous+1, previous, len);
1084 nigel 3 code++;
1085     *previous++ = OP_BRAZERO + repeat_type;
1086     }
1087    
1088     for (i = 1; i < repeat_min; i++)
1089     {
1090 nigel 9 memcpy(code, previous, len);
1091     code += len;
1092 nigel 3 }
1093    
1094     for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1095     {
1096     *code++ = OP_BRAZERO + repeat_type;
1097 nigel 9 memcpy(code, previous, len);
1098     code += len;
1099 nigel 3 }
1100     }
1101    
1102     /* If the maximum is unlimited, set a repeater in the final copy. */
1103    
1104     if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;
1105     }
1106    
1107     /* Else there's some kind of shambles */
1108    
1109     else
1110     {
1111     *errorptr = ERR11;
1112     goto FAILED;
1113     }
1114    
1115     /* In all case we no longer have a previous item. */
1116    
1117     previous = NULL;
1118     break;
1119    
1120    
1121     /* Start of nested bracket sub-expression, or comment or lookahead.
1122     First deal with special things that can come after a bracket; all are
1123     introduced by ?, and the appearance of any of them means that this is not a
1124     referencing group. They were checked for validity in the first pass over
1125     the string, so we don't have to check for syntax errors here. */
1126    
1127     case '(':
1128     previous = code; /* Only real brackets can be repeated */
1129     if (*(++ptr) == '?')
1130     {
1131     bravalue = OP_BRA;
1132    
1133     switch (*(++ptr))
1134     {
1135     case '#':
1136     case 'i':
1137     case 'm':
1138     case 's':
1139     case 'x':
1140     ptr++;
1141     while (*ptr != ')') ptr++;
1142     previous = NULL;
1143     continue;
1144    
1145     case ':': /* Non-extracting bracket */
1146     ptr++;
1147     break;
1148    
1149     case '=': /* Assertions can't be repeated */
1150     bravalue = OP_ASSERT;
1151     ptr++;
1152     previous = NULL;
1153     break;
1154    
1155     case '!':
1156     bravalue = OP_ASSERT_NOT;
1157     ptr++;
1158     previous = NULL;
1159     break;
1160    
1161     case '>': /* "Match once" brackets */
1162     if ((options & PCRE_EXTRA) != 0) /* Not yet standard */
1163     {
1164     bravalue = OP_ONCE;
1165     ptr++;
1166     previous = NULL;
1167     break;
1168     }
1169     /* Else fall through */
1170    
1171     default:
1172     *errorptr = ERR12;
1173     goto FAILED;
1174     }
1175     }
1176    
1177     /* Else we have a referencing group */
1178    
1179     else
1180     {
1181     if (++(*brackets) > EXTRACT_MAX)
1182     {
1183     *errorptr = ERR13;
1184     goto FAILED;
1185     }
1186     bravalue = OP_BRA + *brackets;
1187     }
1188    
1189     /* Process nested bracketed re; at end pointer is on the bracket. We copy
1190     code into a non-register variable in order to be able to pass its address
1191     because some compilers complain otherwise. */
1192    
1193     *code = bravalue;
1194     {
1195     uschar *mcode = code;
1196     if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))
1197     goto FAILED;
1198     code = mcode;
1199     }
1200    
1201     if (*ptr != ')')
1202     {
1203     *errorptr = ERR14;
1204     goto FAILED;
1205     }
1206     break;
1207    
1208     /* Check \ for being a real metacharacter; if not, fall through and handle
1209     it as a data character at the start of a string. Escape items are checked
1210     for validity in the pre-compiling pass. */
1211    
1212     case '\\':
1213     oldptr = ptr;
1214     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1215    
1216     /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1217     are arranged to be the negation of the corresponding OP_values. For the
1218     back references, the values are ESC_REF plus the reference number. Only
1219     back references and those types that consume a character may be repeated.
1220     We can test for values between ESC_b and ESC_Z for the latter; this may
1221     have to change if any new ones are ever created. */
1222    
1223     if (c < 0)
1224     {
1225     if (-c >= ESC_REF)
1226     {
1227     int refnum = -c - ESC_REF;
1228     if (*brackets < refnum)
1229     {
1230     *errorptr = ERR15;
1231     goto FAILED;
1232     }
1233     previous = code;
1234     *code++ = OP_REF;
1235     *code++ = refnum;
1236     }
1237     else
1238     {
1239     previous = (-c > ESC_b && -c < ESC_X)? code : NULL;
1240     *code++ = -c;
1241     }
1242     continue;
1243     }
1244    
1245 nigel 7 /* Data character: reset and fall through */
1246 nigel 3
1247     ptr = oldptr;
1248     c = '\\';
1249    
1250     /* Handle a run of data characters until a metacharacter is encountered.
1251     The first character is guaranteed not to be whitespace or # when the
1252     extended flag is set. */
1253    
1254     NORMAL_CHAR:
1255     default:
1256     previous = code;
1257     *code = OP_CHARS;
1258     code += 2;
1259     length = 0;
1260    
1261     do
1262     {
1263     if ((options & PCRE_EXTENDED) != 0)
1264     {
1265     if ((pcre_ctypes[c] & ctype_space) != 0) continue;
1266     if (c == '#')
1267     {
1268     while ((c = *(++ptr)) != 0 && c != '\n');
1269     if (c == 0) break;
1270     continue;
1271     }
1272     }
1273    
1274     /* Backslash may introduce a data char or a metacharacter. Escaped items
1275     are checked for validity in the pre-compiling pass. Stop the string
1276     before a metaitem. */
1277    
1278     if (c == '\\')
1279     {
1280     oldptr = ptr;
1281     c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1282     if (c < 0) { ptr = oldptr; break; }
1283     }
1284    
1285     /* Ordinary character or single-char escape */
1286    
1287     *code++ = c;
1288     length++;
1289     }
1290    
1291     /* This "while" is the end of the "do" above. */
1292    
1293     while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1294    
1295     /* Compute the length and set it in the data vector, and advance to
1296     the next state. */
1297    
1298     previous[1] = length;
1299     ptr--;
1300     break;
1301     }
1302     } /* end of big loop */
1303    
1304     /* Control never reaches here by falling through, only by a goto for all the
1305     error states. Pass back the position in the pattern so that it can be displayed
1306     to the user for diagnosing the error. */
1307    
1308     FAILED:
1309     *ptrptr = ptr;
1310     return FALSE;
1311     }
1312    
1313    
1314    
1315    
1316     /*************************************************
1317     * Compile sequence of alternatives *
1318     *************************************************/
1319    
1320     /* On entry, ptr is pointing past the bracket character, but on return
1321     it points to the closing bracket, or vertical bar, or end of string.
1322     The code variable is pointing at the byte into which the BRA operator has been
1323     stored.
1324    
1325     Argument:
1326     options the option bits
1327     brackets -> int containing the number of extracting brackets used
1328     codeptr -> the address of the current code pointer
1329     ptrptr -> the address of the current pattern pointer
1330     errorptr -> pointer to error message
1331    
1332     Returns: TRUE on success
1333     */
1334    
1335     static BOOL
1336 nigel 7 compile_regex(int options, int *brackets, uschar **codeptr,
1337     const uschar **ptrptr, const char **errorptr)
1338 nigel 3 {
1339 nigel 7 const uschar *ptr = *ptrptr;
1340 nigel 3 uschar *code = *codeptr;
1341     uschar *start_bracket = code;
1342    
1343     for (;;)
1344     {
1345     int length;
1346     uschar *last_branch = code;
1347    
1348     code += 3;
1349     if (!compile_branch(options, brackets, &code, &ptr, errorptr))
1350     {
1351     *ptrptr = ptr;
1352     return FALSE;
1353     }
1354    
1355     /* Fill in the length of the last branch */
1356    
1357     length = code - last_branch;
1358     last_branch[1] = length >> 8;
1359     last_branch[2] = length & 255;
1360    
1361     /* Reached end of expression, either ')' or end of pattern. Insert a
1362     terminating ket and the length of the whole bracketed item, and return,
1363     leaving the pointer at the terminating char. */
1364    
1365     if (*ptr != '|')
1366     {
1367     length = code - start_bracket;
1368     *code++ = OP_KET;
1369     *code++ = length >> 8;
1370     *code++ = length & 255;
1371     *codeptr = code;
1372     *ptrptr = ptr;
1373     return TRUE;
1374     }
1375    
1376     /* Another branch follows; insert an "or" node and advance the pointer. */
1377    
1378     *code = OP_ALT;
1379     ptr++;
1380     }
1381     /* Control never reaches here */
1382     }
1383    
1384    
1385    
1386     /*************************************************
1387     * Check for anchored expression *
1388     *************************************************/
1389    
1390     /* Try to find out if this is an anchored regular expression. Consider each
1391     alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket
1392     all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then
1393     it's anchored. However, if this is a multiline pattern, then only OP_SOD
1394     counts, since OP_CIRC can match in the middle.
1395    
1396     A branch is also implicitly anchored if it starts with .* because that will try
1397     the rest of the pattern at all possible matching points, so there is no point
1398     trying them again.
1399    
1400     Argument: points to start of expression (the bracket)
1401     Returns: TRUE or FALSE
1402     */
1403    
1404     static BOOL
1405 nigel 7 is_anchored(register const uschar *code, BOOL multiline)
1406 nigel 3 {
1407     do {
1408     int op = (int)code[3];
1409     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)
1410     { if (!is_anchored(code+3, multiline)) return FALSE; }
1411     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1412     { if (code[4] != OP_ANY) return FALSE; }
1413     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;
1414     code += (code[1] << 8) + code[2];
1415     }
1416     while (*code == OP_ALT);
1417     return TRUE;
1418     }
1419    
1420    
1421    
1422     /*************************************************
1423     * Check for start with \n line expression *
1424     *************************************************/
1425    
1426     /* This is called for multiline expressions to try to find out if every branch
1427     starts with ^ so that "first char" processing can be done to speed things up.
1428    
1429     Argument: points to start of expression (the bracket)
1430     Returns: TRUE or FALSE
1431     */
1432    
1433     static BOOL
1434 nigel 7 is_startline(const uschar *code)
1435 nigel 3 {
1436     do {
1437     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1438     { if (!is_startline(code+3)) return FALSE; }
1439     else if (code[3] != OP_CIRC) return FALSE;
1440     code += (code[1] << 8) + code[2];
1441     }
1442     while (*code == OP_ALT);
1443     return TRUE;
1444     }
1445    
1446    
1447    
1448     /*************************************************
1449     * Check for fixed first char *
1450     *************************************************/
1451    
1452     /* Try to find out if there is a fixed first character. This is called for
1453     unanchored expressions, as it speeds up their processing quite considerably.
1454     Consider each alternative branch. If they all start with the same char, or with
1455     a bracket all of whose alternatives start with the same char (recurse ad lib),
1456     then we return that char, otherwise -1.
1457    
1458     Argument: points to start of expression (the bracket)
1459     Returns: -1 or the fixed first char
1460     */
1461    
1462     static int
1463     find_firstchar(uschar *code)
1464     {
1465     register int c = -1;
1466     do
1467     {
1468     register int charoffset = 4;
1469    
1470     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
1471     {
1472     register int d;
1473     if ((d = find_firstchar(code+3)) < 0) return -1;
1474     if (c < 0) c = d; else if (c != d) return -1;
1475     }
1476    
1477     else switch(code[3])
1478     {
1479     default:
1480     return -1;
1481    
1482     case OP_EXACT: /* Fall through */
1483     charoffset++;
1484    
1485     case OP_CHARS: /* Fall through */
1486     charoffset++;
1487    
1488     case OP_PLUS:
1489     case OP_MINPLUS:
1490     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;
1491     break;
1492     }
1493     code += (code[1] << 8) + code[2];
1494     }
1495     while (*code == OP_ALT);
1496     return c;
1497     }
1498    
1499    
1500    
1501     /*************************************************
1502     * Compile a Regular Expression *
1503     *************************************************/
1504    
1505     /* This function takes a string and returns a pointer to a block of store
1506     holding a compiled version of the expression.
1507    
1508     Arguments:
1509     pattern the regular expression
1510     options various option bits
1511     errorptr pointer to pointer to error text
1512     erroroffset ptr offset in pattern where error was detected
1513    
1514     Returns: pointer to compiled data block, or NULL on error,
1515     with errorptr and erroroffset set
1516     */
1517    
1518     pcre *
1519 nigel 7 pcre_compile(const char *pattern, int options, const char **errorptr,
1520 nigel 3 int *erroroffset)
1521     {
1522     real_pcre *re;
1523     int spaces = 0;
1524     int length = 3; /* For initial BRA plus length */
1525     int runlength;
1526     int c, size;
1527     int bracount = 0;
1528     int brastack[200];
1529     int top_backref = 0;
1530 nigel 7 unsigned int brastackptr = 0;
1531     uschar *code;
1532     const uschar *ptr;
1533 nigel 3
1534     #ifdef DEBUG
1535     uschar *code_base, *code_end;
1536     #endif
1537    
1538     /* We can't pass back an error message if errorptr is NULL; I guess the best we
1539     can do is just return NULL. */
1540    
1541     if (errorptr == NULL) return NULL;
1542     *errorptr = NULL;
1543    
1544     /* However, we can give a message for this error */
1545    
1546     if (erroroffset == NULL)
1547     {
1548     *errorptr = ERR16;
1549     return NULL;
1550     }
1551     *erroroffset = 0;
1552    
1553     if ((options & ~PUBLIC_OPTIONS) != 0)
1554     {
1555     *errorptr = ERR17;
1556     return NULL;
1557     }
1558    
1559 nigel 9 DPRINTF(("------------------------------------------------------------------\n"));
1560     DPRINTF(("%s\n", pattern));
1561 nigel 3
1562     /* The first thing to do is to make a pass over the pattern to compute the
1563     amount of store required to hold the compiled code. This does not have to be
1564     perfect as long as errors are overestimates. At the same time we can detect any
1565     internal flag settings. Make an attempt to correct for any counted white space
1566     if an "extended" flag setting appears late in the pattern. We can't be so
1567     clever for #-comments. */
1568    
1569 nigel 7 ptr = (const uschar *)(pattern - 1);
1570 nigel 3 while ((c = *(++ptr)) != 0)
1571     {
1572     int min, max;
1573     int class_charcount;
1574    
1575     if ((pcre_ctypes[c] & ctype_space) != 0)
1576     {
1577     if ((options & PCRE_EXTENDED) != 0) continue;
1578     spaces++;
1579     }
1580    
1581     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1582     {
1583     while ((c = *(++ptr)) != 0 && c != '\n');
1584     continue;
1585     }
1586    
1587     switch(c)
1588     {
1589     /* A backslashed item may be an escaped "normal" character or a
1590     character type. For a "normal" character, put the pointers and
1591     character back so that tests for whitespace etc. in the input
1592     are done correctly. */
1593    
1594     case '\\':
1595     {
1596 nigel 7 const uschar *save_ptr = ptr;
1597 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1598     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1599     if (c >= 0)
1600     {
1601     ptr = save_ptr;
1602     c = '\\';
1603     goto NORMAL_CHAR;
1604     }
1605     }
1606     length++;
1607    
1608     /* A back reference needs an additional char, plus either one or 5
1609     bytes for a repeat. We also need to keep the value of the highest
1610     back reference. */
1611    
1612     if (c <= -ESC_REF)
1613     {
1614     int refnum = -c - ESC_REF;
1615     if (refnum > top_backref) top_backref = refnum;
1616     length++; /* For single back reference */
1617     if (ptr[1] == '{' && is_counted_repeat(ptr+2))
1618     {
1619     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1620     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1621     if ((min == 0 && (max == 1 || max == -1)) ||
1622     (min == 1 && max == -1))
1623     length++;
1624     else length += 5;
1625     if (ptr[1] == '?') ptr++;
1626     }
1627     }
1628     continue;
1629    
1630     case '^':
1631     case '.':
1632     case '$':
1633     case '*': /* These repeats won't be after brackets; */
1634     case '+': /* those are handled separately */
1635     case '?':
1636     length++;
1637     continue;
1638    
1639     /* This covers the cases of repeats after a single char, metachar, class,
1640     or back reference. */
1641    
1642     case '{':
1643     if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;
1644     ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);
1645     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1646     if ((min == 0 && (max == 1 || max == -1)) ||
1647     (min == 1 && max == -1))
1648     length++;
1649     else
1650     {
1651     length--; /* Uncount the original char or metachar */
1652     if (min == 1) length++; else if (min > 0) length += 4;
1653     if (max > 0) length += 4; else length += 2;
1654     }
1655     if (ptr[1] == '?') ptr++;
1656     continue;
1657    
1658     /* An alternation contains an offset to the next branch or ket. */
1659     case '|':
1660     length += 3;
1661     continue;
1662    
1663     /* A character class uses 33 characters. Don't worry about character types
1664     that aren't allowed in classes - they'll get picked up during the compile.
1665     A character class that contains only one character uses 2 or 3 bytes,
1666     depending on whether it is negated or not. Notice this where we can. */
1667    
1668     case '[':
1669     class_charcount = 0;
1670     if (*(++ptr) == '^') ptr++;
1671     do
1672     {
1673     if (*ptr == '\\')
1674     {
1675 nigel 9 int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1676 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1677 nigel 9 if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1678 nigel 3 }
1679     else class_charcount++;
1680     ptr++;
1681     }
1682     while (*ptr != 0 && *ptr != ']');
1683    
1684     /* Repeats for negated single chars are handled by the general code */
1685    
1686     if (class_charcount == 1) length += 3; else
1687     {
1688     length += 33;
1689    
1690     /* A repeat needs either 1 or 5 bytes. */
1691    
1692 nigel 9 if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1693 nigel 3 {
1694     ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1695     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1696     if ((min == 0 && (max == 1 || max == -1)) ||
1697     (min == 1 && max == -1))
1698     length++;
1699     else length += 5;
1700     if (ptr[1] == '?') ptr++;
1701     }
1702     }
1703     continue;
1704    
1705     /* Brackets may be genuine groups or special things */
1706    
1707     case '(':
1708    
1709     /* Handle special forms of bracket, which all start (? */
1710    
1711     if (ptr[1] == '?') switch (c = ptr[2])
1712     {
1713     /* Skip over comments entirely */
1714     case '#':
1715     ptr += 3;
1716     while (*ptr != 0 && *ptr != ')') ptr++;
1717     if (*ptr == 0)
1718     {
1719     *errorptr = ERR18;
1720     goto PCRE_ERROR_RETURN;
1721     }
1722     continue;
1723    
1724     /* Non-referencing groups and lookaheads just move the pointer on, and
1725     then behave like a non-special bracket, except that they don't increment
1726     the count of extracting brackets. */
1727    
1728     case ':':
1729     case '=':
1730     case '!':
1731     ptr += 2;
1732     break;
1733    
1734     /* Ditto for the "once only" bracket, allowed only if the extra bit
1735     is set. */
1736    
1737     case '>':
1738     if ((options & PCRE_EXTRA) != 0)
1739     {
1740     ptr += 2;
1741     break;
1742     }
1743     /* Else fall thourh */
1744    
1745     /* Else loop setting valid options until ) is met. Anything else is an
1746     error. */
1747    
1748     default:
1749     ptr += 2;
1750     for (;; ptr++)
1751     {
1752     if ((c = *ptr) == 'i')
1753     {
1754     options |= PCRE_CASELESS;
1755     continue;
1756     }
1757     else if ((c = *ptr) == 'm')
1758     {
1759     options |= PCRE_MULTILINE;
1760     continue;
1761     }
1762     else if (c == 's')
1763     {
1764     options |= PCRE_DOTALL;
1765     continue;
1766     }
1767     else if (c == 'x')
1768     {
1769     options |= PCRE_EXTENDED;
1770     length -= spaces; /* Already counted spaces */
1771     continue;
1772     }
1773     else if (c == ')') break;
1774    
1775     *errorptr = ERR12;
1776     goto PCRE_ERROR_RETURN;
1777     }
1778     continue; /* End of this bracket handling */
1779     }
1780    
1781     /* Extracting brackets must be counted so we can process escapes in a
1782     Perlish way. */
1783    
1784     else bracount++;
1785    
1786     /* Non-special forms of bracket. Save length for computing whole length
1787     at end if there's a repeat that requires duplication of the group. */
1788    
1789     if (brastackptr >= sizeof(brastack)/sizeof(int))
1790     {
1791     *errorptr = ERR19;
1792     goto PCRE_ERROR_RETURN;
1793     }
1794    
1795     brastack[brastackptr++] = length;
1796     length += 3;
1797     continue;
1798    
1799     /* Handle ket. Look for subsequent max/min; for certain sets of values we
1800 nigel 9 have to replicate this bracket up to that many times. If brastackptr is
1801     0 this is an unmatched bracket which will generate an error, but take care
1802     not to try to access brastack[-1]. */
1803 nigel 3
1804     case ')':
1805     length += 3;
1806     {
1807 nigel 9 int minval = 1;
1808     int maxval = 1;
1809     int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1810 nigel 3
1811     /* Leave ptr at the final char; for read_repeat_counts this happens
1812     automatically; for the others we need an increment. */
1813    
1814     if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1815     {
1816 nigel 9 ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1817 nigel 3 if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1818     }
1819 nigel 9 else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1820     else if (c == '+') { maxval = -1; ptr++; }
1821     else if (c == '?') { minval = 0; ptr++; }
1822 nigel 3
1823 nigel 9 /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1824     if there is a limited maximum we have to replicate up to maxval-1 times
1825     and allow for a BRAZERO item before each optional copy, as we also have
1826     to do before the first copy if the minimum is zero. */
1827 nigel 3
1828 nigel 9 if (minval == 0) length++;
1829     else if (minval > 1) length += (minval - 1) * duplength;
1830     if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1831 nigel 3 }
1832     continue;
1833    
1834     /* Non-special character. For a run of such characters the length required
1835     is the number of characters + 2, except that the maximum run length is 255.
1836     We won't get a skipped space or a non-data escape or the start of a #
1837     comment as the first character, so the length can't be zero. */
1838    
1839     NORMAL_CHAR:
1840     default:
1841     length += 2;
1842     runlength = 0;
1843     do
1844     {
1845     if ((pcre_ctypes[c] & ctype_space) != 0)
1846     {
1847     if ((options & PCRE_EXTENDED) != 0) continue;
1848     spaces++;
1849     }
1850    
1851     if (c == '#' && (options & PCRE_EXTENDED) != 0)
1852     {
1853     while ((c = *(++ptr)) != 0 && c != '\n');
1854     continue;
1855     }
1856    
1857     /* Backslash may introduce a data char or a metacharacter; stop the
1858     string before the latter. */
1859    
1860     if (c == '\\')
1861     {
1862 nigel 7 const uschar *saveptr = ptr;
1863 nigel 3 c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1864     if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1865     if (c < 0) { ptr = saveptr; break; }
1866     }
1867    
1868     /* Ordinary character or single-char escape */
1869    
1870     runlength++;
1871     }
1872    
1873     /* This "while" is the end of the "do" above. */
1874    
1875     while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);
1876    
1877     ptr--;
1878     length += runlength;
1879     continue;
1880     }
1881     }
1882    
1883     length += 4; /* For final KET and END */
1884    
1885     if (length > 65539)
1886     {
1887     *errorptr = ERR20;
1888     return NULL;
1889     }
1890    
1891     /* Compute the size of data block needed and get it, either from malloc or
1892 nigel 9 externally provided function. We specify "code[0]" in the offsetof() expression
1893     rather than just "code", because it has been reported that one broken compiler
1894     fails on "code" because it is also an independent variable. It should make no
1895     difference to the value of the offsetof(). */
1896 nigel 3
1897 nigel 9 size = length + offsetof(real_pcre, code[0]);
1898 nigel 3 re = (real_pcre *)(pcre_malloc)(size);
1899    
1900     if (re == NULL)
1901     {
1902     *errorptr = ERR21;
1903     return NULL;
1904     }
1905    
1906 nigel 9 /* Put in the magic number and the options. */
1907    
1908 nigel 3 re->magic_number = MAGIC_NUMBER;
1909     re->options = options;
1910    
1911     /* Set up a starting, non-extracting bracket, then compile the expression. On
1912     error, *errorptr will be set non-NULL, so we don't need to look at the result
1913     of the function here. */
1914    
1915 nigel 7 ptr = (const uschar *)pattern;
1916 nigel 3 code = re->code;
1917     *code = OP_BRA;
1918     bracount = 0;
1919     (void)compile_regex(options, &bracount, &code, &ptr, errorptr);
1920     re->top_bracket = bracount;
1921     re->top_backref = top_backref;
1922    
1923     /* If not reached end of pattern on success, there's an excess bracket. */
1924    
1925     if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;
1926    
1927     /* Fill in the terminating state and check for disastrous overflow, but
1928     if debugging, leave the test till after things are printed out. */
1929    
1930     *code++ = OP_END;
1931    
1932     #ifndef DEBUG
1933     if (code - re->code > length) *errorptr = ERR23;
1934     #endif
1935    
1936     /* Failed to compile */
1937    
1938     if (*errorptr != NULL)
1939     {
1940     (pcre_free)(re);
1941     PCRE_ERROR_RETURN:
1942 nigel 7 *erroroffset = ptr - (const uschar *)pattern;
1943 nigel 3 return NULL;
1944     }
1945    
1946     /* If the anchored option was not passed, set flag if we can determine that it
1947     is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if
1948     we can determine what the first character has to be, because that speeds up
1949     unanchored matches no end. In the case of multiline matches, an alternative is
1950     to set the PCRE_STARTLINE flag if all branches start with ^. */
1951    
1952     if ((options & PCRE_ANCHORED) == 0)
1953     {
1954     if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))
1955     re->options |= PCRE_ANCHORED;
1956     else
1957     {
1958 nigel 9 int ch = find_firstchar(re->code);
1959     if (ch >= 0)
1960 nigel 3 {
1961 nigel 9 re->first_char = ch;
1962 nigel 3 re->options |= PCRE_FIRSTSET;
1963     }
1964     else if (is_startline(re->code))
1965     re->options |= PCRE_STARTLINE;
1966     }
1967     }
1968    
1969     /* Print out the compiled data for debugging */
1970    
1971     #ifdef DEBUG
1972    
1973     printf("Length = %d top_bracket = %d top_backref=%d\n",
1974     length, re->top_bracket, re->top_backref);
1975    
1976     if (re->options != 0)
1977     {
1978     printf("%s%s%s%s%s%s%s\n",
1979     ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
1980     ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
1981     ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
1982     ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
1983     ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
1984     ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
1985     ((re->options & PCRE_EXTRA) != 0)? "extra " : "");
1986     }
1987    
1988     if ((re->options & PCRE_FIRSTSET) != 0)
1989     {
1990     if (isprint(re->first_char)) printf("First char = %c\n", re->first_char);
1991     else printf("First char = \\x%02x\n", re->first_char);
1992     }
1993    
1994     code_end = code;
1995     code_base = code = re->code;
1996    
1997     while (code < code_end)
1998     {
1999     int charlength;
2000    
2001     printf("%3d ", code - code_base);
2002    
2003     if (*code >= OP_BRA)
2004     {
2005     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
2006     code += 2;
2007     }
2008    
2009     else switch(*code)
2010     {
2011     case OP_CHARS:
2012     charlength = *(++code);
2013     printf("%3d ", charlength);
2014     while (charlength-- > 0)
2015     if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);
2016     break;
2017    
2018     case OP_KETRMAX:
2019     case OP_KETRMIN:
2020     case OP_ALT:
2021     case OP_KET:
2022     case OP_ASSERT:
2023     case OP_ASSERT_NOT:
2024     case OP_ONCE:
2025     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2026     code += 2;
2027     break;
2028    
2029     case OP_STAR:
2030     case OP_MINSTAR:
2031     case OP_PLUS:
2032     case OP_MINPLUS:
2033     case OP_QUERY:
2034     case OP_MINQUERY:
2035     case OP_TYPESTAR:
2036     case OP_TYPEMINSTAR:
2037     case OP_TYPEPLUS:
2038     case OP_TYPEMINPLUS:
2039     case OP_TYPEQUERY:
2040     case OP_TYPEMINQUERY:
2041     if (*code >= OP_TYPESTAR)
2042     printf(" %s", OP_names[code[1]]);
2043     else if (isprint(c = code[1])) printf(" %c", c);
2044     else printf(" \\x%02x", c);
2045     printf("%s", OP_names[*code++]);
2046     break;
2047    
2048     case OP_EXACT:
2049     case OP_UPTO:
2050     case OP_MINUPTO:
2051     if (isprint(c = code[3])) printf(" %c{", c);
2052     else printf(" \\x%02x{", c);
2053 nigel 11 if (*code != OP_EXACT) printf("0,");
2054 nigel 3 printf("%d}", (code[1] << 8) + code[2]);
2055     if (*code == OP_MINUPTO) printf("?");
2056     code += 3;
2057     break;
2058    
2059     case OP_TYPEEXACT:
2060     case OP_TYPEUPTO:
2061     case OP_TYPEMINUPTO:
2062     printf(" %s{", OP_names[code[3]]);
2063     if (*code != OP_TYPEEXACT) printf(",");
2064     printf("%d}", (code[1] << 8) + code[2]);
2065     if (*code == OP_TYPEMINUPTO) printf("?");
2066     code += 3;
2067     break;
2068    
2069     case OP_NOT:
2070     if (isprint(c = *(++code))) printf(" [^%c]", c);
2071     else printf(" [^\\x%02x]", c);
2072     break;
2073    
2074     case OP_NOTSTAR:
2075     case OP_NOTMINSTAR:
2076     case OP_NOTPLUS:
2077     case OP_NOTMINPLUS:
2078     case OP_NOTQUERY:
2079     case OP_NOTMINQUERY:
2080     if (isprint(c = code[1])) printf(" [^%c]", c);
2081     else printf(" [^\\x%02x]", c);
2082     printf("%s", OP_names[*code++]);
2083     break;
2084    
2085     case OP_NOTEXACT:
2086     case OP_NOTUPTO:
2087     case OP_NOTMINUPTO:
2088     if (isprint(c = code[3])) printf(" [^%c]{", c);
2089     else printf(" [^\\x%02x]{", c);
2090     if (*code != OP_NOTEXACT) printf(",");
2091     printf("%d}", (code[1] << 8) + code[2]);
2092     if (*code == OP_NOTMINUPTO) printf("?");
2093     code += 3;
2094     break;
2095    
2096     case OP_REF:
2097     printf(" \\%d", *(++code));
2098 nigel 9 code ++;
2099     goto CLASS_REF_REPEAT;
2100 nigel 3
2101     case OP_CLASS:
2102 nigel 13 case OP_NEGCLASS:
2103 nigel 3 {
2104     int i, min, max;
2105    
2106 nigel 13 if (*code++ == OP_CLASS) printf(" [");
2107     else printf(" ^[");
2108 nigel 3
2109     for (i = 0; i < 256; i++)
2110     {
2111     if ((code[i/8] & (1 << (i&7))) != 0)
2112     {
2113     int j;
2114     for (j = i+1; j < 256; j++)
2115     if ((code[j/8] & (1 << (j&7))) == 0) break;
2116     if (i == '-' || i == ']') printf("\\");
2117     if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);
2118     if (--j > i)
2119     {
2120     printf("-");
2121     if (j == '-' || j == ']') printf("\\");
2122     if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);
2123     }
2124     i = j;
2125     }
2126     }
2127     printf("]");
2128     code += 32;
2129    
2130 nigel 9 CLASS_REF_REPEAT:
2131    
2132 nigel 3 switch(*code)
2133     {
2134     case OP_CRSTAR:
2135     case OP_CRMINSTAR:
2136     case OP_CRPLUS:
2137     case OP_CRMINPLUS:
2138     case OP_CRQUERY:
2139     case OP_CRMINQUERY:
2140     printf("%s", OP_names[*code]);
2141     break;
2142    
2143     case OP_CRRANGE:
2144     case OP_CRMINRANGE:
2145     min = (code[1] << 8) + code[2];
2146     max = (code[3] << 8) + code[4];
2147     if (max == 0) printf("{%d,}", min);
2148     else printf("{%d,%d}", min, max);
2149     if (*code == OP_CRMINRANGE) printf("?");
2150     code += 4;
2151     break;
2152    
2153     default:
2154     code--;
2155     }
2156     }
2157     break;
2158    
2159     /* Anything else is just a one-node item */
2160    
2161     default:
2162     printf(" %s", OP_names[*code]);
2163     break;
2164     }
2165    
2166     code++;
2167     printf("\n");
2168     }
2169     printf("------------------------------------------------------------------\n");
2170    
2171     /* This check is done here in the debugging case so that the code that
2172     was compiled can be seen. */
2173    
2174     if (code - re->code > length)
2175     {
2176     *errorptr = ERR23;
2177     (pcre_free)(re);
2178     *erroroffset = ptr - (uschar *)pattern;
2179     return NULL;
2180     }
2181     #endif
2182    
2183     return (pcre *)re;
2184     }
2185    
2186    
2187    
2188     /*************************************************
2189     * Match a character type *
2190     *************************************************/
2191    
2192     /* Not used in all the places it might be as it's sometimes faster
2193     to put the code inline.
2194    
2195     Arguments:
2196     type the character type
2197     c the character
2198     dotall the dotall flag
2199    
2200     Returns: TRUE if character is of the type
2201     */
2202    
2203     static BOOL
2204     match_type(int type, int c, BOOL dotall)
2205     {
2206    
2207     #ifdef DEBUG
2208     if (isprint(c)) printf("matching subject %c against ", c);
2209     else printf("matching subject \\x%02x against ", c);
2210     printf("%s\n", OP_names[type]);
2211     #endif
2212    
2213     switch(type)
2214     {
2215     case OP_ANY: return dotall || c != '\n';
2216     case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0;
2217     case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0;
2218     case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;
2219     case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0;
2220     case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0;
2221     case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0;
2222     }
2223     return FALSE;
2224     }
2225    
2226    
2227    
2228     /*************************************************
2229     * Match a back-reference *
2230     *************************************************/
2231    
2232     /* If a back reference hasn't been set, the match fails.
2233    
2234     Arguments:
2235     number reference number
2236     eptr points into the subject
2237     length length to be matched
2238     md points to match data block
2239    
2240     Returns: TRUE if matched
2241     */
2242    
2243     static BOOL
2244 nigel 7 match_ref(int number, register const uschar *eptr, int length, match_data *md)
2245 nigel 3 {
2246 nigel 7 const uschar *p = md->start_subject + md->offset_vector[number];
2247 nigel 3
2248     #ifdef DEBUG
2249     if (eptr >= md->end_subject)
2250     printf("matching subject <null>");
2251     else
2252     {
2253     printf("matching subject ");
2254     pchars(eptr, length, TRUE, md);
2255     }
2256     printf(" against backref ");
2257     pchars(p, length, FALSE, md);
2258     printf("\n");
2259     #endif
2260    
2261     /* Always fail if not enough characters left */
2262    
2263     if (length > md->end_subject - p) return FALSE;
2264    
2265     /* Separate the caselesss case for speed */
2266    
2267     if (md->caseless)
2268     { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
2269     else
2270     { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2271    
2272     return TRUE;
2273     }
2274    
2275    
2276    
2277     /*************************************************
2278     * Match from current position *
2279     *************************************************/
2280    
2281     /* On entry ecode points to the first opcode, and eptr to the first character.
2282    
2283     Arguments:
2284     eptr pointer in subject
2285     ecode position in code
2286     offset_top current top pointer
2287     md pointer to "static" info for the match
2288    
2289     Returns: TRUE if matched
2290     */
2291    
2292     static BOOL
2293 nigel 7 match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2294 nigel 3 match_data *md)
2295     {
2296     for (;;)
2297     {
2298     int min, max, ctype;
2299     register int i;
2300     register int c;
2301 nigel 7 BOOL minimize = FALSE;
2302 nigel 3
2303     /* Opening bracket. Check the alternative branches in turn, failing if none
2304     match. We have to set the start offset if required and there is space
2305     in the offset vector so that it is available for subsequent back references
2306     if the bracket matches. However, if the bracket fails, we must put back the
2307     previous value of both offsets in case they were set by a previous copy of
2308     the same bracket. Don't worry about setting the flag for the error case here;
2309     that is handled in the code for KET. */
2310    
2311     if ((int)*ecode >= OP_BRA)
2312     {
2313     int number = (*ecode - OP_BRA) << 1;
2314 nigel 7 int save_offset1 = 0, save_offset2 = 0;
2315 nigel 3
2316 nigel 9 DPRINTF(("start bracket %d\n", number/2));
2317 nigel 3
2318     if (number > 0 && number < md->offset_end)
2319     {
2320     save_offset1 = md->offset_vector[number];
2321     save_offset2 = md->offset_vector[number+1];
2322     md->offset_vector[number] = eptr - md->start_subject;
2323    
2324 nigel 9 DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
2325 nigel 3 }
2326    
2327     /* Recurse for all the alternatives. */
2328    
2329     do
2330     {
2331     if (match(eptr, ecode+3, offset_top, md)) return TRUE;
2332     ecode += (ecode[1] << 8) + ecode[2];
2333     }
2334     while (*ecode == OP_ALT);
2335    
2336 nigel 9 DPRINTF(("bracket %d failed\n", number/2));
2337 nigel 3
2338     if (number > 0 && number < md->offset_end)
2339     {
2340     md->offset_vector[number] = save_offset1;
2341     md->offset_vector[number+1] = save_offset2;
2342     }
2343    
2344     return FALSE;
2345     }
2346    
2347     /* Other types of node can be handled by a switch */
2348    
2349     switch(*ecode)
2350     {
2351     case OP_END:
2352     md->end_match_ptr = eptr; /* Record where we ended */
2353     md->end_offset_top = offset_top; /* and how many extracts were taken */
2354     return TRUE;
2355    
2356     /* The equivalent of Prolog's "cut" - if the rest doesn't match, the
2357     whole thing doesn't match, so we have to get out via a longjmp(). */
2358    
2359     case OP_CUT:
2360     if (match(eptr, ecode+1, offset_top, md)) return TRUE;
2361     longjmp(md->fail_env, 1);
2362    
2363     /* Assertion brackets. Check the alternative branches in turn - the
2364     matching won't pass the KET for an assertion. If any one branch matches,
2365     the assertion is true. */
2366    
2367     case OP_ASSERT:
2368     do
2369     {
2370     if (match(eptr, ecode+3, offset_top, md)) break;
2371     ecode += (ecode[1] << 8) + ecode[2];
2372     }
2373     while (*ecode == OP_ALT);
2374     if (*ecode == OP_KET) return FALSE;
2375    
2376     /* Continue from after the assertion, updating the offsets high water
2377     mark, since extracts may have been taken during the assertion. */
2378    
2379     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2380     ecode += 3;
2381     offset_top = md->end_offset_top;
2382     continue;
2383    
2384     /* Negative assertion: all branches must fail to match */
2385    
2386     case OP_ASSERT_NOT:
2387     do
2388     {
2389     if (match(eptr, ecode+3, offset_top, md)) return FALSE;
2390     ecode += (ecode[1] << 8) + ecode[2];
2391     }
2392     while (*ecode == OP_ALT);
2393     ecode += 3;
2394     continue;
2395    
2396     /* "Once" brackets are like assertion brackets except that after a match,
2397     the point in the subject string is not moved back. Thus there can never be
2398 nigel 5 a move back into the brackets. Check the alternative branches in turn - the
2399 nigel 3 matching won't pass the KET for this kind of subpattern. If any one branch
2400     matches, we carry on, leaving the subject pointer. */
2401    
2402     case OP_ONCE:
2403     do
2404     {
2405     if (match(eptr, ecode+3, offset_top, md)) break;
2406     ecode += (ecode[1] << 8) + ecode[2];
2407     }
2408     while (*ecode == OP_ALT);
2409     if (*ecode == OP_KET) return FALSE;
2410    
2411     /* Continue as from after the assertion, updating the offsets high water
2412     mark, since extracts may have been taken. */
2413    
2414     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2415     ecode += 3;
2416     offset_top = md->end_offset_top;
2417     eptr = md->end_match_ptr;
2418     continue;
2419    
2420     /* An alternation is the end of a branch; scan along to find the end of the
2421     bracketed group and go to there. */
2422    
2423     case OP_ALT:
2424     do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2425     break;
2426    
2427     /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
2428     that it may occur zero times. It may repeat infinitely, or not at all -
2429     i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
2430     repeat limits are compiled as a number of copies, with the optional ones
2431     preceded by BRAZERO or BRAMINZERO. */
2432    
2433     case OP_BRAZERO:
2434     {
2435 nigel 7 const uschar *next = ecode+1;
2436 nigel 3 if (match(eptr, next, offset_top, md)) return TRUE;
2437     do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2438     ecode = next + 3;
2439     }
2440     break;
2441    
2442     case OP_BRAMINZERO:
2443     {
2444 nigel 7 const uschar *next = ecode+1;
2445 nigel 3 do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2446     if (match(eptr, next+3, offset_top, md)) return TRUE;
2447     ecode++;
2448     }
2449     break;;
2450    
2451     /* End of a group, repeated or non-repeating. If we are at the end of
2452     an assertion "group", stop matching and return TRUE, but record the
2453     current high water mark for use by positive assertions. */
2454    
2455     case OP_KET:
2456     case OP_KETRMIN:
2457     case OP_KETRMAX:
2458     {
2459     int number;
2460 nigel 7 const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2461 nigel 3
2462     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2463     {
2464     md->end_match_ptr = eptr; /* For ONCE */
2465     md->end_offset_top = offset_top;
2466     return TRUE;
2467     }
2468    
2469     /* In all other cases we have to check the group number back at the
2470     start and if necessary complete handling an extraction by setting the
2471     final offset and bumping the high water mark. */
2472    
2473     number = (*prev - OP_BRA) << 1;
2474    
2475 nigel 9 DPRINTF(("end bracket %d\n", number/2));
2476 nigel 3
2477     if (number > 0)
2478     {
2479     if (number >= md->offset_end) md->offset_overflow = TRUE; else
2480     {
2481     md->offset_vector[number+1] = eptr - md->start_subject;
2482     if (offset_top <= number) offset_top = number + 2;
2483     }
2484     }
2485    
2486     /* For a non-repeating ket, just advance to the next node and continue at
2487     this level. */
2488    
2489     if (*ecode == OP_KET)
2490     {
2491     ecode += 3;
2492     break;
2493     }
2494    
2495     /* The repeating kets try the rest of the pattern or restart from the
2496     preceding bracket, in the appropriate order. */
2497    
2498     if (*ecode == OP_KETRMIN)
2499     {
2500     if (match(eptr, ecode+3, offset_top, md) ||
2501     match(eptr, prev, offset_top, md)) return TRUE;
2502     }
2503     else /* OP_KETRMAX */
2504     {
2505     if (match(eptr, prev, offset_top, md) ||
2506     match(eptr, ecode+3, offset_top, md)) return TRUE;
2507     }
2508     }
2509     return FALSE;
2510    
2511     /* Start of subject unless notbol, or after internal newline if multiline */
2512    
2513     case OP_CIRC:
2514     if (md->notbol && eptr == md->start_subject) return FALSE;
2515     if (md->multiline)
2516     {
2517     if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
2518     ecode++;
2519     break;
2520     }
2521     /* ... else fall through */
2522    
2523     /* Start of subject assertion */
2524    
2525     case OP_SOD:
2526     if (eptr != md->start_subject) return FALSE;
2527     ecode++;
2528     break;
2529    
2530     /* Assert before internal newline if multiline, or before
2531     a terminating newline unless endonly is set, else end of subject unless
2532     noteol is set. */
2533    
2534     case OP_DOLL:
2535     if (md->noteol && eptr >= md->end_subject) return FALSE;
2536     if (md->multiline)
2537     {
2538     if (eptr < md->end_subject && *eptr != '\n') return FALSE;
2539     ecode++;
2540     break;
2541     }
2542     else if (!md->endonly)
2543     {
2544     if (eptr < md->end_subject - 1 ||
2545     (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
2546     ecode++;
2547     break;
2548     }
2549     /* ... else fall through */
2550    
2551     /* End of subject assertion */
2552    
2553     case OP_EOD:
2554     if (eptr < md->end_subject) return FALSE;
2555     ecode++;
2556     break;
2557    
2558     /* Word boundary assertions */
2559    
2560     case OP_NOT_WORD_BOUNDARY:
2561     case OP_WORD_BOUNDARY:
2562     {
2563     BOOL prev_is_word = (eptr != md->start_subject) &&
2564     ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);
2565     BOOL cur_is_word = (eptr < md->end_subject) &&
2566     ((pcre_ctypes[*eptr] & ctype_word) != 0);
2567     if ((*ecode++ == OP_WORD_BOUNDARY)?
2568     cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2569     return FALSE;
2570     }
2571     break;
2572    
2573     /* Match a single character type; inline for speed */
2574    
2575     case OP_ANY:
2576     if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;
2577     if (eptr++ >= md->end_subject) return FALSE;
2578     ecode++;
2579     break;
2580    
2581     case OP_NOT_DIGIT:
2582     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)
2583     return FALSE;
2584     ecode++;
2585     break;
2586    
2587     case OP_DIGIT:
2588     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)
2589     return FALSE;
2590     ecode++;
2591     break;
2592    
2593     case OP_NOT_WHITESPACE:
2594     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)
2595     return FALSE;
2596     ecode++;
2597     break;
2598    
2599     case OP_WHITESPACE:
2600     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)
2601     return FALSE;
2602     ecode++;
2603     break;
2604    
2605     case OP_NOT_WORDCHAR:
2606     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)
2607     return FALSE;
2608     ecode++;
2609     break;
2610    
2611     case OP_WORDCHAR:
2612     if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)
2613     return FALSE;
2614     ecode++;
2615     break;
2616    
2617     /* Match a back reference, possibly repeatedly. Look past the end of the
2618     item to see if there is repeat information following. The code is similar
2619     to that for character classes, but repeated for efficiency. Then obey
2620     similar code to character type repeats - written out again for speed.
2621     However, if the referenced string is the empty string, always treat
2622     it as matched, any number of times (otherwise there could be infinite
2623     loops). */
2624    
2625     case OP_REF:
2626     {
2627     int length;
2628     int number = ecode[1] << 1; /* Doubled reference number */
2629     ecode += 2; /* Advance past the item */
2630    
2631     if (number >= offset_top || md->offset_vector[number] < 0)
2632     {
2633     md->errorcode = PCRE_ERROR_BADREF;
2634     return FALSE;
2635     }
2636    
2637     length = md->offset_vector[number+1] - md->offset_vector[number];
2638    
2639     switch (*ecode)
2640     {
2641     case OP_CRSTAR:
2642     case OP_CRMINSTAR:
2643     case OP_CRPLUS:
2644     case OP_CRMINPLUS:
2645     case OP_CRQUERY:
2646     case OP_CRMINQUERY:
2647     c = *ecode++ - OP_CRSTAR;
2648     minimize = (c & 1) != 0;
2649     min = rep_min[c]; /* Pick up values from tables; */
2650     max = rep_max[c]; /* zero for max => infinity */
2651     if (max == 0) max = INT_MAX;
2652     break;
2653    
2654     case OP_CRRANGE:
2655     case OP_CRMINRANGE:
2656     minimize = (*ecode == OP_CRMINRANGE);
2657     min = (ecode[1] << 8) + ecode[2];
2658     max = (ecode[3] << 8) + ecode[4];
2659     if (max == 0) max = INT_MAX;
2660     ecode += 5;
2661     break;
2662    
2663     default: /* No repeat follows */
2664     if (!match_ref(number, eptr, length, md)) return FALSE;
2665     eptr += length;
2666     continue; /* With the main loop */
2667     }
2668    
2669     /* If the length of the reference is zero, just continue with the
2670     main loop. */
2671    
2672     if (length == 0) continue;
2673    
2674     /* First, ensure the minimum number of matches are present. We get back
2675     the length of the reference string explicitly rather than passing the
2676     address of eptr, so that eptr can be a register variable. */
2677    
2678     for (i = 1; i <= min; i++)
2679     {
2680     if (!match_ref(number, eptr, length, md)) return FALSE;
2681     eptr += length;
2682     }
2683    
2684     /* If min = max, continue at the same level without recursion.
2685     They are not both allowed to be zero. */
2686    
2687     if (min == max) continue;
2688    
2689     /* If minimizing, keep trying and advancing the pointer */
2690    
2691     if (minimize)
2692     {
2693     for (i = min;; i++)
2694     {
2695     if (match(eptr, ecode, offset_top, md)) return TRUE;
2696     if (i >= max || !match_ref(number, eptr, length, md))
2697     return FALSE;
2698     eptr += length;
2699     }
2700     /* Control never gets here */
2701     }
2702    
2703     /* If maximizing, find the longest string and work backwards */
2704    
2705     else
2706     {
2707 nigel 7 const uschar *pp = eptr;
2708 nigel 3 for (i = min; i < max; i++)
2709     {
2710     if (!match_ref(number, eptr, length, md)) break;
2711     eptr += length;
2712     }
2713     while (eptr >= pp)
2714     {
2715     if (match(eptr, ecode, offset_top, md)) return TRUE;
2716     eptr -= length;
2717     }
2718     return FALSE;
2719     }
2720     }
2721     /* Control never gets here */
2722    
2723     /* Match a character class, possibly repeatedly. Look past the end of the
2724     item to see if there is repeat information following. Then obey similar
2725     code to character type repeats - written out again for speed. If caseless
2726     matching was set at runtime but not at compile time, we have to check both
2727 nigel 13 versions of a character, and we have to behave differently for positive and
2728     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2729     treated differently. */
2730 nigel 3
2731     case OP_CLASS:
2732 nigel 13 case OP_NEGCLASS:
2733 nigel 3 {
2734 nigel 13 BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2735 nigel 7 const uschar *data = ecode + 1; /* Save for matching */
2736     ecode += 33; /* Advance past the item */
2737 nigel 3
2738     switch (*ecode)
2739     {
2740     case OP_CRSTAR:
2741     case OP_CRMINSTAR:
2742     case OP_CRPLUS:
2743     case OP_CRMINPLUS:
2744     case OP_CRQUERY:
2745     case OP_CRMINQUERY:
2746     c = *ecode++ - OP_CRSTAR;
2747     minimize = (c & 1) != 0;
2748     min = rep_min[c]; /* Pick up values from tables; */
2749     max = rep_max[c]; /* zero for max => infinity */
2750     if (max == 0) max = INT_MAX;
2751     break;
2752    
2753     case OP_CRRANGE:
2754     case OP_CRMINRANGE:
2755     minimize = (*ecode == OP_CRMINRANGE);
2756     min = (ecode[1] << 8) + ecode[2];
2757     max = (ecode[3] << 8) + ecode[4];
2758     if (max == 0) max = INT_MAX;
2759     ecode += 5;
2760     break;
2761    
2762     default: /* No repeat follows */
2763 nigel 13 min = max = 1;
2764     break;
2765 nigel 3 }
2766    
2767     /* First, ensure the minimum number of matches are present. */
2768    
2769     for (i = 1; i <= min; i++)
2770     {
2771     if (eptr >= md->end_subject) return FALSE;
2772     c = *eptr++;
2773 nigel 13
2774     /* Either not runtime caseless, or it was a positive class. For
2775     runtime caseless, continue if either case is in the map. */
2776    
2777     if (!nasty_case)
2778 nigel 3 {
2779 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2780     if (md->runtime_caseless)
2781     {
2782     c = pcre_fcc[c];
2783     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2784     }
2785     }
2786    
2787     /* Runtime caseless and it was a negative class. Continue only if
2788     both cases are in the map. */
2789    
2790     else
2791     {
2792     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2793 nigel 3 c = pcre_fcc[c];
2794     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2795     }
2796 nigel 13
2797 nigel 3 return FALSE;
2798     }
2799    
2800     /* If max == min we can continue with the main loop without the
2801     need to recurse. */
2802    
2803     if (min == max) continue;
2804    
2805     /* If minimizing, keep testing the rest of the expression and advancing
2806     the pointer while it matches the class. */
2807    
2808     if (minimize)
2809     {
2810     for (i = min;; i++)
2811     {
2812     if (match(eptr, ecode, offset_top, md)) return TRUE;
2813     if (i >= max || eptr >= md->end_subject) return FALSE;
2814     c = *eptr++;
2815 nigel 13
2816     /* Either not runtime caseless, or it was a positive class. For
2817     runtime caseless, continue if either case is in the map. */
2818    
2819     if (!nasty_case)
2820 nigel 3 {
2821 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2822     if (md->runtime_caseless)
2823     {
2824     c = pcre_fcc[c];
2825     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2826     }
2827     }
2828    
2829     /* Runtime caseless and it was a negative class. Continue only if
2830     both cases are in the map. */
2831    
2832     else
2833     {
2834     if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2835 nigel 3 c = pcre_fcc[c];
2836     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2837     }
2838 nigel 13
2839 nigel 3 return FALSE;
2840     }
2841     /* Control never gets here */
2842     }
2843    
2844     /* If maximizing, find the longest possible run, then work backwards. */
2845    
2846     else
2847     {
2848 nigel 7 const uschar *pp = eptr;
2849 nigel 3 for (i = min; i < max; eptr++, i++)
2850     {
2851     if (eptr >= md->end_subject) break;
2852     c = *eptr;
2853 nigel 13
2854     /* Either not runtime caseless, or it was a positive class. For
2855     runtime caseless, continue if either case is in the map. */
2856    
2857     if (!nasty_case)
2858 nigel 3 {
2859 nigel 13 if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860     if (md->runtime_caseless)
2861     {
2862     c = pcre_fcc[c];
2863     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2864     }
2865     }
2866    
2867     /* Runtime caseless and it was a negative class. Continue only if
2868     both cases are in the map. */
2869    
2870     else
2871     {
2872     if ((data[c/8] & (1 << (c&7))) == 0) break;
2873 nigel 3 c = pcre_fcc[c];
2874     if ((data[c/8] & (1 << (c&7))) != 0) continue;
2875     }
2876 nigel 13
2877 nigel 3 break;
2878     }
2879    
2880     while (eptr >= pp)
2881     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2882     return FALSE;
2883     }
2884     }
2885     /* Control never gets here */
2886    
2887     /* Match a run of characters */
2888    
2889     case OP_CHARS:
2890     {
2891     register int length = ecode[1];
2892     ecode += 2;
2893    
2894 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
2895 nigel 3 if (eptr >= md->end_subject)
2896     printf("matching subject <null> against pattern ");
2897     else
2898     {
2899     printf("matching subject ");
2900     pchars(eptr, length, TRUE, md);
2901     printf(" against pattern ");
2902     }
2903     pchars(ecode, length, FALSE, md);
2904     printf("\n");
2905 nigel 9 #endif
2906 nigel 3
2907     if (length > md->end_subject - eptr) return FALSE;
2908     if (md->caseless)
2909     {
2910     while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;
2911     }
2912     else
2913     {
2914     while (length-- > 0) if (*ecode++ != *eptr++) return FALSE;
2915     }
2916     }
2917     break;
2918    
2919     /* Match a single character repeatedly; different opcodes share code. */
2920    
2921     case OP_EXACT:
2922     min = max = (ecode[1] << 8) + ecode[2];
2923     ecode += 3;
2924     goto REPEATCHAR;
2925    
2926     case OP_UPTO:
2927     case OP_MINUPTO:
2928     min = 0;
2929     max = (ecode[1] << 8) + ecode[2];
2930     minimize = *ecode == OP_MINUPTO;
2931     ecode += 3;
2932     goto REPEATCHAR;
2933    
2934     case OP_STAR:
2935     case OP_MINSTAR:
2936     case OP_PLUS:
2937     case OP_MINPLUS:
2938     case OP_QUERY:
2939     case OP_MINQUERY:
2940     c = *ecode++ - OP_STAR;
2941     minimize = (c & 1) != 0;
2942     min = rep_min[c]; /* Pick up values from tables; */
2943     max = rep_max[c]; /* zero for max => infinity */
2944     if (max == 0) max = INT_MAX;
2945    
2946     /* Common code for all repeated single-character matches. We can give
2947     up quickly if there are fewer than the minimum number of characters left in
2948     the subject. */
2949    
2950     REPEATCHAR:
2951     if (min > md->end_subject - eptr) return FALSE;
2952     c = *ecode++;
2953    
2954     /* The code is duplicated for the caseless and caseful cases, for speed,
2955     since matching characters is likely to be quite common. First, ensure the
2956     minimum number of matches are present. If min = max, continue at the same
2957     level without recursing. Otherwise, if minimizing, keep trying the rest of
2958     the expression and advancing one matching character if failing, up to the
2959     maximum. Alternatively, if maximizing, find the maximum number of
2960     characters and work backwards. */
2961    
2962 nigel 9 DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2963     max, eptr));
2964 nigel 3
2965     if (md->caseless)
2966     {
2967     c = pcre_lcc[c];
2968     for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;
2969     if (min == max) continue;
2970     if (minimize)
2971     {
2972     for (i = min;; i++)
2973     {
2974     if (match(eptr, ecode, offset_top, md)) return TRUE;
2975     if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])
2976     return FALSE;
2977     }
2978     /* Control never gets here */
2979     }
2980     else
2981     {
2982 nigel 7 const uschar *pp = eptr;
2983 nigel 3 for (i = min; i < max; i++)
2984     {
2985     if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
2986     eptr++;
2987     }
2988     while (eptr >= pp)
2989     if (match(eptr--, ecode, offset_top, md)) return TRUE;
2990     return FALSE;
2991     }
2992     /* Control never gets here */
2993     }
2994    
2995     /* Caseful comparisons */
2996    
2997     else
2998     {
2999     for (i = 1; i <= min; i++) if (c != *eptr++) return FALSE;
3000     if (min == max) continue;
3001     if (minimize)
3002     {
3003     for (i = min;; i++)
3004     {
3005     if (match(eptr, ecode, offset_top, md)) return TRUE;
3006     if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3007     }
3008     /* Control never gets here */
3009     }
3010     else
3011     {
3012 nigel 7 const uschar *pp = eptr;
3013 nigel 3 for (i = min; i < max; i++)
3014     {
3015     if (eptr >= md->end_subject || c != *eptr) break;
3016     eptr++;
3017     }
3018     while (eptr >= pp)
3019     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3020     return FALSE;
3021     }
3022     }
3023     /* Control never gets here */
3024    
3025     /* Match a negated single character */
3026    
3027     case OP_NOT:
3028 nigel 9 if (eptr >= md->end_subject) return FALSE;
3029 nigel 3 ecode++;
3030     if (md->caseless)
3031     {
3032     if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;
3033     }
3034     else
3035     {
3036     if (*ecode++ == *eptr++) return FALSE;
3037     }
3038     break;
3039    
3040     /* Match a negated single character repeatedly. This is almost a repeat of
3041     the code for a repeated single character, but I haven't found a nice way of
3042     commoning these up that doesn't require a test of the positive/negative
3043     option for each character match. Maybe that wouldn't add very much to the
3044     time taken, but character matching *is* what this is all about... */
3045    
3046     case OP_NOTEXACT:
3047     min = max = (ecode[1] << 8) + ecode[2];
3048     ecode += 3;
3049     goto REPEATNOTCHAR;
3050    
3051     case OP_NOTUPTO:
3052     case OP_NOTMINUPTO:
3053     min = 0;
3054     max = (ecode[1] << 8) + ecode[2];
3055     minimize = *ecode == OP_NOTMINUPTO;
3056     ecode += 3;
3057     goto REPEATNOTCHAR;
3058    
3059     case OP_NOTSTAR:
3060     case OP_NOTMINSTAR:
3061     case OP_NOTPLUS:
3062     case OP_NOTMINPLUS:
3063     case OP_NOTQUERY:
3064     case OP_NOTMINQUERY:
3065     c = *ecode++ - OP_NOTSTAR;
3066     minimize = (c & 1) != 0;
3067     min = rep_min[c]; /* Pick up values from tables; */
3068     max = rep_max[c]; /* zero for max => infinity */
3069     if (max == 0) max = INT_MAX;
3070    
3071     /* Common code for all repeated single-character matches. We can give
3072     up quickly if there are fewer than the minimum number of characters left in
3073     the subject. */
3074    
3075     REPEATNOTCHAR:
3076     if (min > md->end_subject - eptr) return FALSE;
3077     c = *ecode++;
3078    
3079     /* The code is duplicated for the caseless and caseful cases, for speed,
3080     since matching characters is likely to be quite common. First, ensure the
3081     minimum number of matches are present. If min = max, continue at the same
3082     level without recursing. Otherwise, if minimizing, keep trying the rest of
3083     the expression and advancing one matching character if failing, up to the
3084     maximum. Alternatively, if maximizing, find the maximum number of
3085     characters and work backwards. */
3086    
3087 nigel 9 DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3088     max, eptr));
3089 nigel 3
3090     if (md->caseless)
3091     {
3092     c = pcre_lcc[c];
3093     for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;
3094     if (min == max) continue;
3095     if (minimize)
3096     {
3097     for (i = min;; i++)
3098     {
3099     if (match(eptr, ecode, offset_top, md)) return TRUE;
3100     if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])
3101     return FALSE;
3102     }
3103     /* Control never gets here */
3104     }
3105     else
3106     {
3107 nigel 7 const uschar *pp = eptr;
3108 nigel 3 for (i = min; i < max; i++)
3109     {
3110     if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
3111     eptr++;
3112     }
3113     while (eptr >= pp)
3114     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3115     return FALSE;
3116     }
3117     /* Control never gets here */
3118     }
3119    
3120     /* Caseful comparisons */
3121    
3122     else
3123     {
3124     for (i = 1; i <= min; i++) if (c == *eptr++) return FALSE;
3125     if (min == max) continue;
3126     if (minimize)
3127     {
3128     for (i = min;; i++)
3129     {
3130     if (match(eptr, ecode, offset_top, md)) return TRUE;
3131     if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3132     }
3133     /* Control never gets here */
3134     }
3135     else
3136     {
3137 nigel 7 const uschar *pp = eptr;
3138 nigel 3 for (i = min; i < max; i++)
3139     {
3140     if (eptr >= md->end_subject || c == *eptr) break;
3141     eptr++;
3142     }
3143     while (eptr >= pp)
3144     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3145     return FALSE;
3146     }
3147     }
3148     /* Control never gets here */
3149    
3150     /* Match a single character type repeatedly; several different opcodes
3151     share code. This is very similar to the code for single characters, but we
3152     repeat it in the interests of efficiency. */
3153    
3154     case OP_TYPEEXACT:
3155     min = max = (ecode[1] << 8) + ecode[2];
3156     minimize = TRUE;
3157     ecode += 3;
3158     goto REPEATTYPE;
3159    
3160     case OP_TYPEUPTO:
3161     case OP_TYPEMINUPTO:
3162     min = 0;
3163     max = (ecode[1] << 8) + ecode[2];
3164     minimize = *ecode == OP_TYPEMINUPTO;
3165     ecode += 3;
3166     goto REPEATTYPE;
3167    
3168     case OP_TYPESTAR:
3169     case OP_TYPEMINSTAR:
3170     case OP_TYPEPLUS:
3171     case OP_TYPEMINPLUS:
3172     case OP_TYPEQUERY:
3173     case OP_TYPEMINQUERY:
3174     c = *ecode++ - OP_TYPESTAR;
3175     minimize = (c & 1) != 0;
3176     min = rep_min[c]; /* Pick up values from tables; */
3177     max = rep_max[c]; /* zero for max => infinity */
3178     if (max == 0) max = INT_MAX;
3179    
3180     /* Common code for all repeated single character type matches */
3181    
3182     REPEATTYPE:
3183     ctype = *ecode++; /* Code for the character type */
3184    
3185     /* First, ensure the minimum number of matches are present. Use inline
3186     code for maximizing the speed, and do the type test once at the start
3187     (i.e. keep it out of the loop). Also test that there are at least the
3188     minimum number of characters before we start. */
3189    
3190     if (min > md->end_subject - eptr) return FALSE;
3191     if (min > 0) switch(ctype)
3192     {
3193     case OP_ANY:
3194     if (!md->dotall)
3195     { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3196     else eptr += min;
3197     break;
3198    
3199     case OP_NOT_DIGIT:
3200     for (i = 1; i <= min; i++)
3201     if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3202     break;
3203    
3204     case OP_DIGIT:
3205     for (i = 1; i <= min; i++)
3206     if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3207     break;
3208    
3209     case OP_NOT_WHITESPACE:
3210     for (i = 1; i <= min; i++)
3211     if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3212     break;
3213    
3214     case OP_WHITESPACE:
3215     for (i = 1; i <= min; i++)
3216     if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3217     break;
3218    
3219     case OP_NOT_WORDCHAR:
3220     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)
3221     return FALSE;
3222     break;
3223    
3224     case OP_WORDCHAR:
3225     for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)
3226     return FALSE;
3227     break;
3228     }
3229    
3230     /* If min = max, continue at the same level without recursing */
3231    
3232     if (min == max) continue;
3233    
3234     /* If minimizing, we have to test the rest of the pattern before each
3235     subsequent match, so inlining isn't much help; just use the function. */
3236    
3237     if (minimize)
3238     {
3239     for (i = min;; i++)
3240     {
3241     if (match(eptr, ecode, offset_top, md)) return TRUE;
3242     if (i >= max || eptr >= md->end_subject ||
3243     !match_type(ctype, *eptr++, md->dotall))
3244     return FALSE;
3245     }
3246     /* Control never gets here */
3247     }
3248    
3249     /* If maximizing it is worth using inline code for speed, doing the type
3250     test once at the start (i.e. keep it out of the loop). */
3251    
3252     else
3253     {
3254 nigel 7 const uschar *pp = eptr;
3255 nigel 3 switch(ctype)
3256     {
3257     case OP_ANY:
3258     if (!md->dotall)
3259     {
3260     for (i = min; i < max; i++)
3261     {
3262     if (eptr >= md->end_subject || *eptr == '\n') break;
3263     eptr++;
3264     }
3265     }
3266     else
3267     {
3268     c = max - min;
3269     if (c > md->end_subject - eptr) c = md->end_subject - eptr;
3270     eptr += c;
3271     }
3272     break;
3273    
3274     case OP_NOT_DIGIT:
3275     for (i = min; i < max; i++)
3276     {
3277     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)
3278     break;
3279     eptr++;
3280     }
3281     break;
3282    
3283     case OP_DIGIT:
3284     for (i = min; i < max; i++)
3285     {
3286     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)
3287     break;
3288     eptr++;
3289     }
3290     break;
3291    
3292     case OP_NOT_WHITESPACE:
3293     for (i = min; i < max; i++)
3294     {
3295     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)
3296     break;
3297     eptr++;
3298     }
3299     break;
3300    
3301     case OP_WHITESPACE:
3302     for (i = min; i < max; i++)
3303     {
3304     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)
3305     break;
3306     eptr++;
3307     }
3308     break;
3309    
3310     case OP_NOT_WORDCHAR:
3311     for (i = min; i < max; i++)
3312     {
3313     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)
3314     break;
3315     eptr++;
3316     }
3317     break;
3318    
3319     case OP_WORDCHAR:
3320     for (i = min; i < max; i++)
3321     {
3322     if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)
3323     break;
3324     eptr++;
3325     }
3326     break;
3327     }
3328    
3329     while (eptr >= pp)
3330     if (match(eptr--, ecode, offset_top, md)) return TRUE;
3331     return FALSE;
3332     }
3333     /* Control never gets here */
3334    
3335     /* There's been some horrible disaster. */
3336    
3337     default:
3338 nigel 9 DPRINTF(("Unknown opcode %d\n", *ecode));
3339 nigel 3 md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3340     return FALSE;
3341     }
3342    
3343     /* Do not stick any code in here without much thought; it is assumed
3344     that "continue" in the code above comes out to here to repeat the main
3345     loop. */
3346    
3347     } /* End of main loop */
3348     /* Control never reaches here */
3349     }
3350    
3351    
3352    
3353     /*************************************************
3354 nigel 9 * Segregate setjmp() *
3355     *************************************************/
3356    
3357     /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3358     is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3359     hide it in a separate function. This is called only when PCRE_EXTRA is set,
3360     since it's needed only for the extension \X option, and with any luck, a good
3361     compiler will spot the tail recursion and compile it efficiently.
3362    
3363 nigel 11 Arguments:
3364     eptr pointer in subject
3365     ecode position in code
3366     offset_top current top pointer
3367     md pointer to "static" info for the match
3368    
3369     Returns: TRUE if matched
3370 nigel 9 */
3371    
3372 nigel 11 static BOOL
3373     match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3374     match_data *match_block)
3375 nigel 9 {
3376 nigel 11 return setjmp(match_block->fail_env) == 0 &&
3377     match(eptr, ecode, offset_top, match_block);
3378 nigel 9 }
3379    
3380    
3381    
3382     /*************************************************
3383 nigel 3 * Execute a Regular Expression *
3384     *************************************************/
3385    
3386     /* This function applies a compiled re to a subject string and picks out
3387     portions of the string if it matches. Two elements in the vector are set for
3388     each substring: the offsets to the start and end of the substring.
3389    
3390     Arguments:
3391     external_re points to the compiled expression
3392     external_extra points to "hints" from pcre_study() or is NULL
3393     subject points to the subject string
3394     length length of subject string (may contain binary zeros)
3395     options option bits
3396     offsets points to a vector of ints to be filled in with offsets
3397     offsetcount the number of elements in the vector
3398    
3399     Returns: > 0 => success; value is the number of elements filled in
3400     = 0 => success, but offsets is not big enough
3401     -1 => failed to match
3402     < -1 => some kind of unexpected problem
3403     */
3404    
3405     int
3406     pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3407     const char *subject, int length, int options, int *offsets, int offsetcount)
3408     {
3409 nigel 11 int resetcount, ocount;
3410 nigel 3 int first_char = -1;
3411     match_data match_block;
3412 nigel 7 const uschar *start_bits = NULL;
3413 nigel 9 const uschar *start_match = (const uschar *)subject;
3414 nigel 7 const uschar *end_subject;
3415     const real_pcre *re = (const real_pcre *)external_re;
3416     const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3417 nigel 11 BOOL using_temporary_offsets = FALSE;
3418 nigel 3 BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3419     BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3420    
3421     if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
3422    
3423     if (re == NULL || subject == NULL ||
3424     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3425     if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3426    
3427 nigel 7 match_block.start_subject = (const uschar *)subject;
3428 nigel 3 match_block.end_subject = match_block.start_subject + length;
3429     end_subject = match_block.end_subject;
3430    
3431     match_block.caseless = ((re->options | options) & PCRE_CASELESS) != 0;
3432     match_block.runtime_caseless = match_block.caseless &&
3433     (re->options & PCRE_CASELESS) == 0;
3434    
3435     match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;
3436     match_block.dotall = ((re->options | options) & PCRE_DOTALL) != 0;
3437     match_block.endonly = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;
3438    
3439     match_block.notbol = (options & PCRE_NOTBOL) != 0;
3440     match_block.noteol = (options & PCRE_NOTEOL) != 0;
3441    
3442     match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
3443    
3444     /* If the expression has got more back references than the offsets supplied can
3445     hold, we get a temporary bit of working store to use during the matching.
3446 nigel 11 Otherwise, we can use the vector supplied, rounding down its size to a multiple
3447     of 2. */
3448 nigel 3
3449 nigel 11 ocount = offsetcount & (-2);
3450     if (re->top_backref > 0 && re->top_backref >= ocount/2)
3451 nigel 3 {
3452     ocount = re->top_backref * 2 + 2;
3453     match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3454     if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3455 nigel 11 using_temporary_offsets = TRUE;
3456 nigel 9 DPRINTF(("Got memory to hold back references\n"));
3457 nigel 3 }
3458     else match_block.offset_vector = offsets;
3459    
3460     match_block.offset_end = ocount;
3461     match_block.offset_overflow = FALSE;
3462    
3463     /* Compute the minimum number of offsets that we need to reset each time. Doing
3464     this makes a huge difference to execution time when there aren't many brackets
3465     in the pattern. */
3466    
3467     resetcount = 2 + re->top_bracket * 2;
3468     if (resetcount > offsetcount) resetcount = ocount;
3469    
3470     /* If MULTILINE is set at exec time but was not set at compile time, and the
3471     anchored flag is set, we must re-check because a setting provoked by ^ in the
3472     pattern is not right in multi-line mode. Calling is_anchored() again here does
3473     the right check, because multiline is now set. If it now yields FALSE, the
3474     expression must have had ^ starting some of its branches. Check to see if
3475     that is true for *all* branches, and if so, set the startline flag. */
3476    
3477     if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
3478     !is_anchored(re->code, match_block.multiline))
3479     {
3480     anchored = FALSE;
3481     if (is_startline(re->code)) startline = TRUE;
3482     }
3483    
3484     /* Set up the first character to match, if available. The first_char value is
3485     never set for an anchored regular expression, but the anchoring may be forced
3486     at run time, so we have to test for anchoring. The first char may be unset for
3487     an unanchored pattern, of course. If there's no first char and the pattern was
3488     studied, the may be a bitmap of possible first characters. However, we can
3489     use this only if the caseless state of the studying was correct. */
3490    
3491     if (!anchored)
3492     {
3493     if ((re->options & PCRE_FIRSTSET) != 0)
3494     {
3495     first_char = re->first_char;
3496     if (match_block.caseless) first_char = pcre_lcc[first_char];
3497     }
3498     else
3499     if (!startline && extra != NULL &&
3500     (extra->options & PCRE_STUDY_MAPPED) != 0 &&
3501     ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)
3502     start_bits = extra->start_bits;
3503     }
3504    
3505     /* Loop for unanchored matches; for anchored regexps the loop runs just once. */
3506    
3507     do
3508     {
3509 nigel 9 int rc;
3510 nigel 3 register int *iptr = match_block.offset_vector;
3511     register int *iend = iptr + resetcount;
3512    
3513     /* Reset the maximum number of extractions we might see. */
3514    
3515     while (iptr < iend) *iptr++ = -1;
3516    
3517     /* Advance to a unique first char if possible */
3518    
3519     if (first_char >= 0)
3520     {
3521     if (match_block.caseless)
3522     while (start_match < end_subject && pcre_lcc[*start_match] != first_char)
3523     start_match++;
3524     else
3525     while (start_match < end_subject && *start_match != first_char)
3526     start_match++;
3527     }
3528    
3529     /* Or to just after \n for a multiline match if possible */
3530    
3531     else if (startline)
3532     {
3533     if (start_match > match_block.start_subject)
3534     {
3535     while (start_match < end_subject && start_match[-1] != '\n')
3536     start_match++;
3537     }
3538     }
3539    
3540     /* Or to a non-unique first char */
3541    
3542     else if (start_bits != NULL)
3543     {
3544     while (start_match < end_subject)
3545     {
3546     register int c = *start_match;
3547     if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;
3548     }
3549     }
3550    
3551 nigel 9 #ifdef DEBUG /* Sigh. Some compilers never learn. */
3552 nigel 3 printf(">>>> Match against: ");
3553     pchars(start_match, end_subject - start_match, TRUE, &match_block);
3554     printf("\n");
3555 nigel 9 #endif
3556 nigel 3
3557     /* When a match occurs, substrings will be set for all internal extractions;
3558     we just need to set up the whole thing as substring 0 before returning. If
3559     there were too many extractions, set the return code to zero. In the case
3560     where we had to get some local store to hold offsets for backreferences, copy
3561     those back references that we can. In this case there need not be overflow
3562     if certain parts of the pattern were not used.
3563    
3564     Before starting the match, we have to set up a longjmp() target to enable
3565 nigel 9 the "cut" operation to fail a match completely without backtracking. This
3566     is done in a separate function to avoid compiler warnings. We need not do
3567     it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3568     enabled. */
3569 nigel 3
3570 nigel 11 if ((re->options & PCRE_EXTRA) != 0)
3571     {
3572     if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3573     continue;
3574     }
3575     else if (!match(start_match, re->code, 2, &match_block)) continue;
3576 nigel 9
3577     /* Copy the offset information from temporary store if necessary */
3578    
3579 nigel 11 if (using_temporary_offsets)
3580 nigel 3 {
3581 nigel 9 if (offsetcount >= 4)
3582 nigel 3 {
3583 nigel 9 memcpy(offsets + 2, match_block.offset_vector + 2,
3584     (offsetcount - 2) * sizeof(int));
3585 nigel 11 DPRINTF(("Copied offsets from temporary memory\n"));
3586 nigel 3 }
3587 nigel 9 if (match_block.end_offset_top > offsetcount)
3588     match_block.offset_overflow = TRUE;
3589 nigel 3
3590 nigel 9 DPRINTF(("Freeing temporary memory\n"));
3591     (pcre_free)(match_block.offset_vector);
3592     }
3593 nigel 3
3594 nigel 9 rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
3595 nigel 3
3596 nigel 9 if (match_block.offset_end < 2) rc = 0; else
3597     {
3598     offsets[0] = start_match - match_block.start_subject;
3599     offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3600 nigel 3 }
3601 nigel 9
3602     DPRINTF((">>>> returning %d\n", rc));
3603     return rc;
3604 nigel 3 }
3605     while (!anchored &&
3606     match_block.errorcode == PCRE_ERROR_NOMATCH &&
3607     start_match++ < end_subject);
3608    
3609 nigel 11 if (using_temporary_offsets)
3610     {
3611     DPRINTF(("Freeing temporary memory\n"));
3612     (pcre_free)(match_block.offset_vector);
3613     }
3614    
3615 nigel 9 DPRINTF((">>>> returning %d\n", match_block.errorcode));
3616 nigel 3
3617     return match_block.errorcode;
3618     }
3619    
3620     /* End of pcre.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12